diff --git a/.circleci/config.yml b/.circleci/config.yml index 051e690b7..43c309133 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,20 +11,60 @@ # version: 2.1 +# A template that can be shared between the two different image-building +# workflows. +.images: &IMAGES + jobs: + # Every job that pushes a Docker image from Docker Hub needs to provide + # credentials. Use this first job to define a yaml anchor that can be + # used to supply a CircleCI job context which makes Docker Hub credentials + # available in the environment. + # + # Contexts are managed in the CircleCI web interface: + # + # https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts + - "build-image-debian-11": &DOCKERHUB_CONTEXT + <<: *DOCKERHUB_CONTEXT + - "build-image-ubuntu-20-04": + <<: *DOCKERHUB_CONTEXT + - "build-image-fedora-35": + <<: *DOCKERHUB_CONTEXT + - "build-image-oraclelinux-8": + <<: *DOCKERHUB_CONTEXT + # Restore later as PyPy38 + #- "build-image-pypy27-buster": + # <<: *DOCKERHUB_CONTEXT + +parameters: + # Control whether the image-building workflow runs as part of this pipeline. + # Generally we do not want this to run because we don't need our + # dependencies to move around all the time and because building the image + # takes a couple minutes. + # + # An easy way to trigger a pipeline with this set to true is with the + # rebuild-images.sh tool in this directory. You can also do so via the + # CircleCI web UI. + build-images: + default: false + type: "boolean" + + # Control whether the test-running workflow runs as part of this pipeline. + # Generally we do want this to run because running the tests is the primary + # purpose of this pipeline. + run-tests: + default: true + type: "boolean" + workflows: ci: + when: "<< pipeline.parameters.run-tests >>" jobs: # Start with jobs testing various platforms. - - "debian-10": - {} - "debian-11": {} - "ubuntu-20-04": {} - - "ubuntu-18-04": - requires: - - "ubuntu-20-04" # Equivalent to RHEL 8; CentOS 8 is dead. - "oraclelinux-8": @@ -65,41 +105,10 @@ workflows: {} images: - # Build the Docker images used by the ci jobs. This makes the ci jobs - # faster and takes various spurious failures out of the critical path. - triggers: - # Build once a day - - schedule: - cron: "0 0 * * *" - filters: - branches: - only: - - "master" + <<: *IMAGES - jobs: - # Every job that pushes a Docker image from Docker Hub needs to provide - # credentials. Use this first job to define a yaml anchor that can be - # used to supply a CircleCI job context which makes Docker Hub - # credentials available in the environment. - # - # Contexts are managed in the CircleCI web interface: - # - # https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts - - "build-image-debian-10": &DOCKERHUB_CONTEXT - context: "dockerhub-auth" - - "build-image-debian-11": - <<: *DOCKERHUB_CONTEXT - - "build-image-ubuntu-18-04": - <<: *DOCKERHUB_CONTEXT - - "build-image-ubuntu-20-04": - <<: *DOCKERHUB_CONTEXT - - "build-image-fedora-35": - <<: *DOCKERHUB_CONTEXT - - "build-image-oraclelinux-8": - <<: *DOCKERHUB_CONTEXT - # Restore later as PyPy38 - #- "build-image-pypy27-buster": - # <<: *DOCKERHUB_CONTEXT + # Build as part of the workflow but only if requested. + when: "<< pipeline.parameters.build-images >>" jobs: @@ -133,10 +142,10 @@ jobs: steps: - "checkout" - - run: + - run: &INSTALL_TOX name: "Install tox" command: | - pip install --user tox + pip install --user 'tox~=3.0' - run: name: "Static-ish code checks" @@ -152,9 +161,7 @@ jobs: - "checkout" - run: - name: "Install tox" - command: | - pip install --user tox + <<: *INSTALL_TOX - run: name: "Make PyInstaller executable" @@ -169,12 +176,7 @@ jobs: command: | dist/Tahoe-LAFS/tahoe --version - debian-10: &DEBIAN - docker: - - <<: *DOCKERHUB_AUTH - image: "tahoelafsci/debian:10-py3.7" - user: "nobody" - + debian-11: &DEBIAN environment: &UTF_8_ENVIRONMENT # In general, the test suite is not allowed to fail while the job # succeeds. But you can set this to "yes" if you want it to be @@ -186,7 +188,7 @@ jobs: # filenames and argv). LANG: "en_US.UTF-8" # Select a tox environment to run for this job. - TAHOE_LAFS_TOX_ENVIRONMENT: "py37" + TAHOE_LAFS_TOX_ENVIRONMENT: "py39" # Additional arguments to pass to tox. TAHOE_LAFS_TOX_ARGS: "" # The path in which test artifacts will be placed. @@ -254,15 +256,11 @@ jobs: /tmp/venv/bin/codecov fi - debian-11: - <<: *DEBIAN docker: - <<: *DOCKERHUB_AUTH image: "tahoelafsci/debian:11-py3.9" user: "nobody" - environment: - <<: *UTF_8_ENVIRONMENT - TAHOE_LAFS_TOX_ENVIRONMENT: "py39" + # Restore later using PyPy3.8 # pypy27-buster: @@ -314,22 +312,6 @@ jobs: - run: *SETUP_VIRTUALENV - run: *RUN_TESTS - ubuntu-18-04: &UBUNTU_18_04 - <<: *DEBIAN - docker: - - <<: *DOCKERHUB_AUTH - image: "tahoelafsci/ubuntu:18.04-py3.7" - user: "nobody" - - environment: - <<: *UTF_8_ENVIRONMENT - # The default trial args include --rterrors which is incompatible with - # this reporter on Python 3. So drop that and just specify the - # reporter. - TAHOE_LAFS_TRIAL_ARGS: "--reporter=subunitv2-file" - TAHOE_LAFS_TOX_ENVIRONMENT: "py37" - - ubuntu-20-04: <<: *DEBIAN docker: @@ -382,7 +364,7 @@ jobs: docker: # Run in a highly Nix-capable environment. - <<: *DOCKERHUB_AUTH - image: "nixos/nix:2.3.16" + image: "nixos/nix:2.10.3" environment: # CACHIX_AUTH_TOKEN is manually set in the CircleCI web UI and @@ -392,27 +374,21 @@ jobs: steps: - "run": - # The nixos/nix image does not include ssh. Install it so the - # `checkout` step will succeed. We also want cachix for - # Nix-friendly caching. + # Get cachix for Nix-friendly caching. name: "Install Basic Dependencies" command: | + NIXPKGS="https://github.com/nixos/nixpkgs/archive/nixos-<>.tar.gz" nix-env \ - --file https://github.com/nixos/nixpkgs/archive/nixos-<>.tar.gz \ + --file $NIXPKGS \ --install \ - -A openssh cachix bash + -A cachix bash + # Activate it for "binary substitution". This sets up + # configuration tht lets Nix download something from the cache + # instead of building it locally, if possible. + cachix use "${CACHIX_NAME}" - "checkout" - - run: - name: "Cachix setup" - # Record the store paths that exist before we did much. There's no - # reason to cache these, they're either in the image or have to be - # retrieved before we can use cachix to restore from cache. - command: | - cachix use "${CACHIX_NAME}" - nix path-info --all > /tmp/store-path-pre-build - - "run": # The Nix package doesn't know how to do this part, unfortunately. name: "Generate version" @@ -434,55 +410,26 @@ jobs: # build a couple simple little dependencies that don't take # advantage of multiple cores and we get a little speedup by doing # them in parallel. - nix-build --cores 3 --max-jobs 2 --argstr pkgsVersion "nixpkgs-<>" + source .circleci/lib.sh + cache_if_able nix-build \ + --cores 3 \ + --max-jobs 2 \ + --argstr pkgsVersion "nixpkgs-<>" - "run": name: "Test" command: | # Let it go somewhat wild for the test suite itself - nix-build --cores 8 --argstr pkgsVersion "nixpkgs-<>" tests.nix - - - run: - # Send any new store objects to cachix. - name: "Push to Cachix" - when: "always" - command: | - # Cribbed from - # https://circleci.com/blog/managing-secrets-when-you-have-pull-requests-from-outside-contributors/ - if [ -n "$CIRCLE_PR_NUMBER" ]; then - # I'm sure you're thinking "CIRCLE_PR_NUMBER must just be the - # number of the PR being built". Sorry, dear reader, you have - # guessed poorly. It is also conditionally set based on whether - # this is a PR from a fork or not. - # - # https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables - echo "Skipping Cachix push for forked PR." - else - # If this *isn't* a build from a fork then we have the Cachix - # write key in our environment and we can push any new objects - # to Cachix. - # - # To decide what to push, we inspect the list of store objects - # that existed before and after we did most of our work. Any - # that are new after the work is probably a useful thing to have - # around so push it to the cache. We exclude all derivation - # objects (.drv files) because they're cheap to reconstruct and - # by the time you know their cache key you've already done all - # the work anyway. - # - # This shell expression for finding the objects and pushing them - # was from the Cachix docs: - # - # https://docs.cachix.org/continuous-integration-setup/circleci.html - # - # but they seem to have removed it now. - bash -c "comm -13 <(sort /tmp/store-path-pre-build | grep -v '\.drv$') <(nix path-info --all | grep -v '\.drv$' | sort) | cachix push $CACHIX_NAME" - fi + source .circleci/lib.sh + cache_if_able nix-build \ + --cores 8 \ + --argstr pkgsVersion "nixpkgs-<>" \ + tests.nix typechecks: docker: - <<: *DOCKERHUB_AUTH - image: "tahoelafsci/ubuntu:18.04-py3.7" + image: "tahoelafsci/ubuntu:20.04-py3.9" steps: - "checkout" @@ -494,7 +441,7 @@ jobs: docs: docker: - <<: *DOCKERHUB_AUTH - image: "tahoelafsci/ubuntu:18.04-py3.7" + image: "tahoelafsci/ubuntu:20.04-py3.9" steps: - "checkout" @@ -545,15 +492,6 @@ jobs: docker push tahoelafsci/${DISTRO}:${TAG}-py${PYTHON_VERSION} - build-image-debian-10: - <<: *BUILD_IMAGE - - environment: - DISTRO: "debian" - TAG: "10" - PYTHON_VERSION: "3.7" - - build-image-debian-11: <<: *BUILD_IMAGE @@ -562,14 +500,6 @@ jobs: TAG: "11" PYTHON_VERSION: "3.9" - build-image-ubuntu-18-04: - <<: *BUILD_IMAGE - - environment: - DISTRO: "ubuntu" - TAG: "18.04" - PYTHON_VERSION: "3.7" - build-image-ubuntu-20-04: <<: *BUILD_IMAGE diff --git a/.circleci/lib.sh b/.circleci/lib.sh new file mode 100644 index 000000000..7717cdb18 --- /dev/null +++ b/.circleci/lib.sh @@ -0,0 +1,26 @@ +# Run a command, enabling cache writes to cachix if possible. The command is +# accepted as a variable number of positional arguments (like argv). +function cache_if_able() { + # The `cachix watch-exec ...` does our cache population. When it sees + # something added to the store (I guess) it pushes it to the named cache. + # + # We can only *push* to it if we have a CACHIX_AUTH_TOKEN, though. + # in-repo jobs will get this from CircleCI configuration but jobs from + # forks may not. + echo "Building PR from user/org: ${CIRCLE_PROJECT_USERNAME}" + if [ -v CACHIX_AUTH_TOKEN ]; then + echo "Cachix credentials present; will attempt to write to cache." + cachix watch-exec "${CACHIX_NAME}" -- "$@" + else + # If we're building a from a forked repository then we're allowed to + # not have the credentials (but it's also fine if the owner of the + # fork supplied their own). + if [ "${CIRCLE_PROJECT_USERNAME}" == "tahoe-lafs" ]; then + echo "Required credentials (CACHIX_AUTH_TOKEN) are missing." + return 1 + else + echo "Cachix credentials missing; will not attempt cache writes." + "$@" + fi + fi +} diff --git a/.circleci/populate-wheelhouse.sh b/.circleci/populate-wheelhouse.sh index 519a80cac..857171979 100755 --- a/.circleci/populate-wheelhouse.sh +++ b/.circleci/populate-wheelhouse.sh @@ -9,7 +9,7 @@ BASIC_DEPS="pip wheel" # Python packages we need to support the test infrastructure. *Not* packages # Tahoe-LAFS itself (implementation or test suite) need. -TEST_DEPS="tox codecov" +TEST_DEPS="tox~=3.0 codecov" # Python packages we need to generate test reports for CI infrastructure. # *Not* packages Tahoe-LAFS itself (implement or test suite) need. diff --git a/.circleci/rebuild-images.sh b/.circleci/rebuild-images.sh new file mode 100755 index 000000000..901651905 --- /dev/null +++ b/.circleci/rebuild-images.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Get your API token here: +# https://app.circleci.com/settings/user/tokens +API_TOKEN=$1 +shift + +# Name the branch you want to trigger the build for +BRANCH=$1 +shift + +curl \ + --verbose \ + --request POST \ + --url https://circleci.com/api/v2/project/gh/tahoe-lafs/tahoe-lafs/pipeline \ + --header "Circle-Token: $API_TOKEN" \ + --header "content-type: application/json" \ + --data '{"branch":"'"$BRANCH"'","parameters":{"build-images":true,"run-tests":false}}' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15e7d8fa4..4d67f09bd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,21 +48,20 @@ jobs: - windows-latest - ubuntu-latest python-version: - - "3.7" - "3.8" - "3.9" - "3.10" include: - # On macOS don't bother with 3.7-3.8, just to get faster builds. + # On macOS don't bother with 3.8, just to get faster builds. - os: macos-latest python-version: "3.9" - os: macos-latest python-version: "3.10" # We only support PyPy on Linux at the moment. - - os: ubuntu-latest - python-version: "pypy-3.7" - os: ubuntu-latest python-version: "pypy-3.8" + - os: ubuntu-latest + python-version: "pypy-3.9" steps: # See https://github.com/actions/checkout. A fetch-depth of 0 @@ -80,15 +79,27 @@ jobs: - name: Install Python packages run: | - pip install --upgrade codecov tox tox-gh-actions setuptools + pip install --upgrade codecov "tox<4" tox-gh-actions setuptools pip list - name: Display tool versions run: python misc/build_helpers/show-tool-versions.py - name: Run tox for corresponding Python version + if: ${{ !contains(matrix.os, 'windows') }} run: python -m tox + # On Windows, a non-blocking pipe might respond (when emulating Unix-y + # API) with ENOSPC to indicate buffer full. Trial doesn't handle this + # well, so it breaks test runs. To attempt to solve this, we pipe the + # output through passthrough.py that will hopefully be able to do the right + # thing by using Windows APIs. + - name: Run tox for corresponding Python version + if: ${{ contains(matrix.os, 'windows') }} + run: | + pip install twisted pywin32 + python -m tox | python misc/windows-enospc/passthrough.py + - name: Upload eliot.log uses: actions/upload-artifact@v3 with: @@ -153,19 +164,18 @@ jobs: strategy: fail-fast: false matrix: - os: - - windows-latest + include: + - os: macos-latest + python-version: "3.9" + force-foolscap: false + - os: windows-latest + python-version: "3.9" + force-foolscap: false # 22.04 has some issue with Tor at the moment: # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3943 - - ubuntu-20.04 - python-version: - - 3.7 - - 3.9 - include: - # On macOS don't bother with 3.7, just to get faster builds. - - os: macos-latest - python-version: 3.9 - + - os: ubuntu-20.04 + python-version: "3.9" + force-foolscap: false steps: - name: Install Tor [Ubuntu] @@ -199,19 +209,31 @@ jobs: - name: Install Python packages run: | - pip install --upgrade tox + pip install --upgrade "tox<4" pip list - name: Display tool versions run: python misc/build_helpers/show-tool-versions.py - name: Run "Python 3 integration tests" + if: "${{ !matrix.force-foolscap }}" env: # On macOS this is necessary to ensure unix socket paths for tor # aren't too long. On Windows tox won't pass it through so it has no # effect. On Linux it doesn't make a difference one way or another. TMPDIR: "/tmp" - run: tox -e integration + run: | + tox -e integration + + - name: Run "Python 3 integration tests (force Foolscap)" + if: "${{ matrix.force-foolscap }}" + env: + # On macOS this is necessary to ensure unix socket paths for tor + # aren't too long. On Windows tox won't pass it through so it has no + # effect. On Linux it doesn't make a difference one way or another. + TMPDIR: "/tmp" + run: | + tox -e integration -- --force-foolscap integration/ - name: Upload eliot.log in case of failure uses: actions/upload-artifact@v3 @@ -247,7 +269,7 @@ jobs: - name: Install Python packages run: | - pip install --upgrade tox + pip install --upgrade "tox<4" pip list - name: Display tool versions diff --git a/README.rst b/README.rst index 317378fae..bbf88610d 100644 --- a/README.rst +++ b/README.rst @@ -56,7 +56,7 @@ Once ``tahoe --version`` works, see `How to Run Tahoe-LAFS `__ 🐍 Python 2 ----------- -Python 3.7 or later is now required. +Python 3.8 or later is required. If you are still using Python 2.7, use Tahoe-LAFS version 1.17.1. diff --git a/default.nix b/default.nix index 5f4db2c78..e4f2dd4d4 100644 --- a/default.nix +++ b/default.nix @@ -29,7 +29,7 @@ in , pypiData ? sources.pypi-deps-db # the pypi package database snapshot to use # for dependency resolution -, pythonVersion ? "python37" # a string choosing the python derivation from +, pythonVersion ? "python39" # a string choosing the python derivation from # nixpkgs to target , extras ? [ "tor" "i2p" ] # a list of strings identifying tahoe-lafs extras, diff --git a/integration/conftest.py b/integration/conftest.py index e284b5cba..5cbe9ad6b 100644 --- a/integration/conftest.py +++ b/integration/conftest.py @@ -1,15 +1,6 @@ """ Ported to Python 3. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - import sys import shutil from time import sleep @@ -66,6 +57,13 @@ def pytest_addoption(parser): "--coverage", action="store_true", dest="coverage", help="Collect coverage statistics", ) + parser.addoption( + "--force-foolscap", action="store_true", default=False, + dest="force_foolscap", + help=("If set, force Foolscap only for the storage protocol. " + + "Otherwise HTTP will be used.") + ) + @pytest.fixture(autouse=True, scope='session') def eliot_logging(): diff --git a/integration/util.py b/integration/util.py index ad9249e45..7d885ee6c 100644 --- a/integration/util.py +++ b/integration/util.py @@ -1,14 +1,6 @@ """ Ported to Python 3. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import sys import time @@ -300,6 +292,14 @@ def _create_node(reactor, request, temp_dir, introducer_furl, flog_gatherer, nam u'log_gatherer.furl', flog_gatherer, ) + force_foolscap = request.config.getoption("force_foolscap") + assert force_foolscap in (True, False) + set_config( + config, + 'storage', + 'force_foolscap', + str(force_foolscap), + ) write_config(FilePath(config_path), config) created_d.addCallback(created) diff --git a/misc/windows-enospc/passthrough.py b/misc/windows-enospc/passthrough.py new file mode 100644 index 000000000..1d4cd48bb --- /dev/null +++ b/misc/windows-enospc/passthrough.py @@ -0,0 +1,36 @@ +""" +Writing to non-blocking pipe can result in ENOSPC when using Unix APIs on +Windows. So, this program passes through data from stdin to stdout, using +Windows APIs instead of Unix-y APIs. +""" + +from twisted.internet.stdio import StandardIO +from twisted.internet import reactor +from twisted.internet.protocol import Protocol +from twisted.internet.interfaces import IHalfCloseableProtocol +from twisted.internet.error import ReactorNotRunning +from zope.interface import implementer + +@implementer(IHalfCloseableProtocol) +class Passthrough(Protocol): + def readConnectionLost(self): + self.transport.loseConnection() + + def writeConnectionLost(self): + try: + reactor.stop() + except ReactorNotRunning: + pass + + def dataReceived(self, data): + self.transport.write(data) + + def connectionLost(self, reason): + try: + reactor.stop() + except ReactorNotRunning: + pass + + +std = StandardIO(Passthrough()) +reactor.run() diff --git a/newsfragments/3870.minor b/newsfragments/3870.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3914.minor b/newsfragments/3914.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3937.minor b/newsfragments/3937.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3939.bugfix b/newsfragments/3939.bugfix new file mode 100644 index 000000000..9d2071d32 --- /dev/null +++ b/newsfragments/3939.bugfix @@ -0,0 +1 @@ +Uploading immutables will now better use available bandwidth, which should allow for faster uploads in many cases. \ No newline at end of file diff --git a/newsfragments/3942.minor b/newsfragments/3942.minor new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/newsfragments/3942.minor @@ -0,0 +1 @@ + diff --git a/newsfragments/3947.minor b/newsfragments/3947.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3950.minor b/newsfragments/3950.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3953.minor b/newsfragments/3953.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3954.minor b/newsfragments/3954.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3956.minor b/newsfragments/3956.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3958.minor b/newsfragments/3958.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3960.minor b/newsfragments/3960.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3962.feature b/newsfragments/3962.feature new file mode 100644 index 000000000..86cf62781 --- /dev/null +++ b/newsfragments/3962.feature @@ -0,0 +1 @@ +Mutable objects can now be created with a pre-determined "signature key" using the ``tahoe put`` CLI or the HTTP API. This enables deterministic creation of mutable capabilities. This feature must be used with care to preserve the normal security and reliability properties. \ No newline at end of file diff --git a/newsfragments/3964.removed b/newsfragments/3964.removed new file mode 100644 index 000000000..d022f94af --- /dev/null +++ b/newsfragments/3964.removed @@ -0,0 +1 @@ +Python 3.7 is no longer supported, and Debian 10 and Ubuntu 18.04 are no longer tested. \ No newline at end of file diff --git a/newsfragments/3966.bugfix b/newsfragments/3966.bugfix new file mode 100644 index 000000000..ead94c47c --- /dev/null +++ b/newsfragments/3966.bugfix @@ -0,0 +1 @@ +Fix incompatibility with newer versions of the transitive charset_normalizer dependency when using PyInstaller. \ No newline at end of file diff --git a/setup.py b/setup.py index 480cb0d88..b211bcd79 100644 --- a/setup.py +++ b/setup.py @@ -96,7 +96,9 @@ install_requires = [ # an sftp extra in Tahoe-LAFS, there is no point in having one. # * Twisted 19.10 introduces Site.getContentFile which we use to get # temporary upload files placed into a per-node temporary directory. - "Twisted[tls,conch] >= 19.10.0", + # * Twisted 22.8.0 added support for coroutine-returning functions in many + # places (mainly via `maybeDeferred`) + "Twisted[tls,conch] >= 22.8.0", "PyYAML >= 3.11", @@ -137,11 +139,22 @@ install_requires = [ "werkzeug != 2.2.0", "treq", "cbor2", - "pycddl >= 0.2", + # Ideally we want 0.4+ to be able to pass in mmap(), but it's not strictly + # necessary yet until we fix the workaround to + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3963 in + # allmydata.storage.http_server. + "pycddl", # for pid-file support "psutil", "filelock", + + # treq needs requests, requests needs charset_normalizer, + # charset_normalizer breaks PyInstaller + # (https://github.com/Ousret/charset_normalizer/issues/253). So work around + # this by using a lower version number. Once upstream issue is fixed, or + # requests drops charset_normalizer, this can go away. + "charset_normalizer < 3", ] setup_requires = [ @@ -221,7 +234,7 @@ def run_command(args, cwd=None): use_shell = sys.platform == "win32" try: p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd, shell=use_shell) - except EnvironmentError as e: # if this gives a SyntaxError, note that Tahoe-LAFS requires Python 3.7+ + except EnvironmentError as e: # if this gives a SyntaxError, note that Tahoe-LAFS requires Python 3.8+ print("Warning: unable to run %r." % (" ".join(args),)) print(e) return None @@ -372,8 +385,8 @@ setup(name="tahoe-lafs", # also set in __init__.py package_dir = {'':'src'}, packages=find_packages('src') + ['allmydata.test.plugins'], classifiers=trove_classifiers, - # We support Python 3.7 or later. 3.11 is not supported yet. - python_requires=">=3.7, <3.11", + # We support Python 3.8 or later. 3.11 is not supported yet. + python_requires=">=3.8, <3.11", install_requires=install_requires, extras_require={ # Duplicate the Twisted pywin32 dependency here. See @@ -386,9 +399,6 @@ setup(name="tahoe-lafs", # also set in __init__.py ], "test": [ "flake8", - # On Python 3.7, importlib_metadata v5 breaks flake8. - # https://github.com/python/importlib_metadata/issues/407 - "importlib_metadata<5; python_version < '3.8'", # Pin a specific pyflakes so we don't have different folks # disagreeing on what is or is not a lint issue. We can bump # this version from time to time, but we will do it @@ -396,7 +406,7 @@ setup(name="tahoe-lafs", # also set in __init__.py "pyflakes == 2.2.0", "coverage ~= 5.0", "mock", - "tox", + "tox ~= 3.0", "pytest", "pytest-twisted", "hypothesis >= 3.6.1", diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 1a158a1aa..73672f30a 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -32,6 +32,7 @@ from allmydata.storage.server import StorageServer, FoolscapStorageServer from allmydata import storage_client from allmydata.immutable.upload import Uploader from allmydata.immutable.offloaded import Helper +from allmydata.mutable.filenode import MutableFileNode from allmydata.introducer.client import IntroducerClient from allmydata.util import ( hashutil, base32, pollmixin, log, idlib, @@ -1086,9 +1087,40 @@ class _Client(node.Node, pollmixin.PollMixin): def create_immutable_dirnode(self, children, convergence=None): return self.nodemaker.create_immutable_directory(children, convergence) - def create_mutable_file(self, contents=None, version=None): + def create_mutable_file( + self, + contents: bytes | None = None, + version: int | None = None, + *, + unique_keypair: tuple[rsa.PublicKey, rsa.PrivateKey] | None = None, + ) -> MutableFileNode: + """ + Create *and upload* a new mutable object. + + :param contents: If given, the initial contents for the new object. + + :param version: If given, the mutable file format for the new object + (otherwise a format will be chosen automatically). + + :param unique_keypair: **Warning** This value independently determines + the identity of the mutable object to create. There cannot be two + different mutable objects that share a keypair. They will merge + into one object (with undefined contents). + + It is common to pass a None value (or not pass a valuye) for this + parameter. In these cases, a new random keypair will be + generated. + + If non-None, the given public/private keypair will be used for the + new object. The expected use-case is for implementing compliance + tests. + + :return: A Deferred which will fire with a representation of the new + mutable object after it has been uploaded. + """ return self.nodemaker.create_mutable_file(contents, - version=version) + version=version, + keypair=unique_keypair) def upload(self, uploadable, reactor=None): uploader = self.getServiceNamed("uploader") diff --git a/src/allmydata/crypto/rsa.py b/src/allmydata/crypto/rsa.py index 95cf01413..e579a3d2a 100644 --- a/src/allmydata/crypto/rsa.py +++ b/src/allmydata/crypto/rsa.py @@ -9,17 +9,14 @@ features of any objects that `cryptography` documents. That is, the public and private keys are opaque objects; DO NOT depend on any of their methods. - -Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations + +from typing_extensions import TypeAlias +from typing import Callable + +from functools import partial from cryptography.exceptions import InvalidSignature from cryptography.hazmat.backends import default_backend @@ -30,6 +27,8 @@ from cryptography.hazmat.primitives.serialization import load_der_private_key, l from allmydata.crypto.error import BadSignature +PublicKey: TypeAlias = rsa.RSAPublicKey +PrivateKey: TypeAlias = rsa.RSAPrivateKey # This is the value that was used by `pycryptopp`, and we must continue to use it for # both backwards compatibility and interoperability. @@ -46,12 +45,12 @@ RSA_PADDING = padding.PSS( -def create_signing_keypair(key_size): +def create_signing_keypair(key_size: int) -> tuple[PrivateKey, PublicKey]: """ Create a new RSA signing (private) keypair from scratch. Can be used with `sign_data` function. - :param int key_size: length of key in bits + :param key_size: length of key in bits :returns: 2-tuple of (private_key, public_key) """ @@ -63,32 +62,62 @@ def create_signing_keypair(key_size): return priv_key, priv_key.public_key() -def create_signing_keypair_from_string(private_key_der): +def create_signing_keypair_from_string(private_key_der: bytes) -> tuple[PrivateKey, PublicKey]: """ Create an RSA signing (private) key from previously serialized private key bytes. - :param bytes private_key_der: blob as returned from `der_string_from_signing_keypair` + :param private_key_der: blob as returned from `der_string_from_signing_keypair` :returns: 2-tuple of (private_key, public_key) """ - priv_key = load_der_private_key( + _load = partial( + load_der_private_key, private_key_der, password=None, backend=default_backend(), ) - if not isinstance(priv_key, rsa.RSAPrivateKey): + + def load_with_validation() -> PrivateKey: + k = _load() + assert isinstance(k, PrivateKey) + return k + + def load_without_validation() -> PrivateKey: + k = _load(unsafe_skip_rsa_key_validation=True) + assert isinstance(k, PrivateKey) + return k + + # Load it once without the potentially expensive OpenSSL validation + # checks. These have superlinear complexity. We *will* run them just + # below - but first we'll apply our own constant-time checks. + load: Callable[[], PrivateKey] = load_without_validation + try: + unsafe_priv_key = load() + except TypeError: + # cryptography<39 does not support this parameter, so just load the + # key with validation... + unsafe_priv_key = load_with_validation() + # But avoid *reloading* it since that will run the expensive + # validation *again*. + load = lambda: unsafe_priv_key + + if not isinstance(unsafe_priv_key, rsa.RSAPrivateKey): raise ValueError( "Private Key did not decode to an RSA key" ) - if priv_key.key_size != 2048: + if unsafe_priv_key.key_size != 2048: raise ValueError( "Private Key must be 2048 bits" ) - return priv_key, priv_key.public_key() + + # Now re-load it with OpenSSL's validation applied. + safe_priv_key = load() + + return safe_priv_key, safe_priv_key.public_key() -def der_string_from_signing_key(private_key): +def der_string_from_signing_key(private_key: PrivateKey) -> bytes: """ Serializes a given RSA private key to a DER string @@ -98,14 +127,14 @@ def der_string_from_signing_key(private_key): :returns: bytes representing `private_key` """ _validate_private_key(private_key) - return private_key.private_bytes( + return private_key.private_bytes( # type: ignore[attr-defined] encoding=Encoding.DER, format=PrivateFormat.PKCS8, encryption_algorithm=NoEncryption(), ) -def der_string_from_verifying_key(public_key): +def der_string_from_verifying_key(public_key: PublicKey) -> bytes: """ Serializes a given RSA public key to a DER string. @@ -121,7 +150,7 @@ def der_string_from_verifying_key(public_key): ) -def create_verifying_key_from_string(public_key_der): +def create_verifying_key_from_string(public_key_der: bytes) -> PublicKey: """ Create an RSA verifying key from a previously serialized public key @@ -134,15 +163,16 @@ def create_verifying_key_from_string(public_key_der): public_key_der, backend=default_backend(), ) + assert isinstance(pub_key, PublicKey) return pub_key -def sign_data(private_key, data): +def sign_data(private_key: PrivateKey, data: bytes) -> bytes: """ :param private_key: the private part of a keypair returned from `create_signing_keypair_from_string` or `create_signing_keypair` - :param bytes data: the bytes to sign + :param data: the bytes to sign :returns: bytes which are a signature of the bytes given as `data`. """ @@ -153,7 +183,7 @@ def sign_data(private_key, data): hashes.SHA256(), ) -def verify_signature(public_key, alleged_signature, data): +def verify_signature(public_key: PublicKey, alleged_signature: bytes, data: bytes) -> None: """ :param public_key: a verifying key, returned from `create_verifying_key_from_string` or `create_verifying_key_from_private_key` @@ -173,23 +203,23 @@ def verify_signature(public_key, alleged_signature, data): raise BadSignature() -def _validate_public_key(public_key): +def _validate_public_key(public_key: PublicKey) -> None: """ Internal helper. Checks that `public_key` is a valid cryptography object """ if not isinstance(public_key, rsa.RSAPublicKey): raise ValueError( - "public_key must be an RSAPublicKey" + f"public_key must be an RSAPublicKey not {type(public_key)}" ) -def _validate_private_key(private_key): +def _validate_private_key(private_key: PrivateKey) -> None: """ Internal helper. Checks that `public_key` is a valid cryptography object """ if not isinstance(private_key, rsa.RSAPrivateKey): raise ValueError( - "private_key must be an RSAPrivateKey" + f"private_key must be an RSAPrivateKey not {type(private_key)}" ) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 874492785..2414527ff 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -262,6 +262,8 @@ class Encoder(object): d.addCallback(lambda res: self.finish_hashing()) + # These calls have to happen in order; layout.py now requires writes to + # be appended to the data written so far. d.addCallback(lambda res: self.send_crypttext_hash_tree_to_all_shareholders()) d.addCallback(lambda res: self.send_all_block_hash_trees()) diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py index d552d43c4..9154f2f30 100644 --- a/src/allmydata/immutable/layout.py +++ b/src/allmydata/immutable/layout.py @@ -1,21 +1,18 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import struct +from io import BytesIO + +from attrs import define, field from zope.interface import implementer from twisted.internet import defer from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ FileTooLargeError, HASH_SIZE -from allmydata.util import mathutil, observer, pipeline, log +from allmydata.util import mathutil, observer, log from allmydata.util.assertutil import precondition from allmydata.storage.server import si_b2a @@ -107,19 +104,58 @@ def make_write_bucket_proxy(rref, server, num_share_hashes, uri_extension_size) return wbp + +@define +class _WriteBuffer: + """ + Queue up small writes to be written in a single batched larger write. + """ + _batch_size: int + _to_write : BytesIO = field(factory=BytesIO) + _written_bytes : int = field(default=0) + + def queue_write(self, data: bytes) -> bool: + """ + Queue a write. If the result is ``False``, no further action is needed + for now. If the result is some ``True``, it's time to call ``flush()`` + and do a real write. + """ + self._to_write.write(data) + return self.get_queued_bytes() >= self._batch_size + + def flush(self) -> tuple[int, bytes]: + """Return offset and data to be written.""" + offset = self._written_bytes + data = self._to_write.getvalue() + self._written_bytes += len(data) + self._to_write = BytesIO() + return (offset, data) + + def get_queued_bytes(self) -> int: + """Return number of queued, unwritten bytes.""" + return self._to_write.tell() + + def get_total_bytes(self) -> int: + """Return how many bytes were written or queued in total.""" + return self._written_bytes + self.get_queued_bytes() + + @implementer(IStorageBucketWriter) class WriteBucketProxy(object): + """ + Note: The various ``put_`` methods need to be called in the order in which the + bytes will get written. + """ fieldsize = 4 fieldstruct = ">L" def __init__(self, rref, server, data_size, block_size, num_segments, - num_share_hashes, uri_extension_size, pipeline_size=50000): + num_share_hashes, uri_extension_size, batch_size=1_000_000): self._rref = rref self._server = server self._data_size = data_size self._block_size = block_size self._num_segments = num_segments - self._written_bytes = 0 effective_segments = mathutil.next_power_of_k(num_segments,2) self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE @@ -130,11 +166,13 @@ class WriteBucketProxy(object): self._create_offsets(block_size, data_size) - # k=3, max_segment_size=128KiB gives us a typical segment of 43691 - # bytes. Setting the default pipeline_size to 50KB lets us get two - # segments onto the wire but not a third, which would keep the pipe - # filled. - self._pipeline = pipeline.Pipeline(pipeline_size) + # With a ~1MB batch size, max upload speed is 1MB/(round-trip latency) + # assuming the writing code waits for writes to finish, so 20MB/sec if + # latency is 50ms. In the US many people only have 1MB/sec upload speed + # as of 2022 (standard Comcast). For further discussion of how one + # might set batch sizes see + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3787#comment:1. + self._write_buffer = _WriteBuffer(batch_size) def get_allocated_size(self): return (self._offsets['uri_extension'] + self.fieldsize + @@ -179,7 +217,7 @@ class WriteBucketProxy(object): return "" % self._server.get_name() def put_header(self): - return self._write(0, self._offset_data) + return self._queue_write(0, self._offset_data) def put_block(self, segmentnum, data): offset = self._offsets['data'] + segmentnum * self._block_size @@ -193,13 +231,13 @@ class WriteBucketProxy(object): (self._block_size * (self._num_segments - 1))), len(data), self._block_size) - return self._write(offset, data) + return self._queue_write(offset, data) def put_crypttext_hashes(self, hashes): # plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and # so is not explicitly written, but we need to write everything, so # fill it in with nulls. - d = self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) + d = self._queue_write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size) d.addCallback(lambda _: self._really_put_crypttext_hashes(hashes)) return d @@ -212,7 +250,7 @@ class WriteBucketProxy(object): precondition(offset + len(data) <= self._offsets['block_hashes'], offset, len(data), offset+len(data), self._offsets['block_hashes']) - return self._write(offset, data) + return self._queue_write(offset, data) def put_block_hashes(self, blockhashes): offset = self._offsets['block_hashes'] @@ -223,7 +261,7 @@ class WriteBucketProxy(object): precondition(offset + len(data) <= self._offsets['share_hashes'], offset, len(data), offset+len(data), self._offsets['share_hashes']) - return self._write(offset, data) + return self._queue_write(offset, data) def put_share_hashes(self, sharehashes): # sharehashes is a list of (index, hash) tuples, so they get stored @@ -237,29 +275,45 @@ class WriteBucketProxy(object): precondition(offset + len(data) <= self._offsets['uri_extension'], offset, len(data), offset+len(data), self._offsets['uri_extension']) - return self._write(offset, data) + return self._queue_write(offset, data) def put_uri_extension(self, data): offset = self._offsets['uri_extension'] assert isinstance(data, bytes) precondition(len(data) == self._uri_extension_size) length = struct.pack(self.fieldstruct, len(data)) - return self._write(offset, length+data) + return self._queue_write(offset, length+data) - def _write(self, offset, data): - # use a Pipeline to pipeline several writes together. TODO: another - # speedup would be to coalesce small writes into a single call: this - # would reduce the foolscap CPU overhead per share, but wouldn't - # reduce the number of round trips, so it might not be worth the - # effort. - self._written_bytes += len(data) - return self._pipeline.add(len(data), - self._rref.callRemote, "write", offset, data) + def _queue_write(self, offset, data): + """ + This queues up small writes to be written in a single batched larger + write. + + Callers of this function are expected to queue the data in order, with + no holes. As such, the offset is technically unnecessary, but is used + to check the inputs. Possibly we should get rid of it. + """ + assert offset == self._write_buffer.get_total_bytes() + if self._write_buffer.queue_write(data): + return self._actually_write() + else: + return defer.succeed(False) + + def _actually_write(self): + """Write data to the server.""" + offset, data = self._write_buffer.flush() + return self._rref.callRemote("write", offset, data) def close(self): - assert self._written_bytes == self.get_allocated_size(), f"{self._written_bytes} != {self.get_allocated_size()}" - d = self._pipeline.add(0, self._rref.callRemote, "close") - d.addCallback(lambda ign: self._pipeline.flush()) + assert self._write_buffer.get_total_bytes() == self.get_allocated_size(), ( + f"{self._written_buffer.get_total_bytes_queued()} != {self.get_allocated_size()}" + ) + if self._write_buffer.get_queued_bytes() > 0: + d = self._actually_write() + else: + # No data queued, don't send empty string write. + d = defer.succeed(True) + d.addCallback(lambda _: self._rref.callRemote("close")) return d def abort(self): @@ -371,16 +425,16 @@ class ReadBucketProxy(object): self._fieldsize = fieldsize self._fieldstruct = fieldstruct - for field in ( 'data', - 'plaintext_hash_tree', # UNUSED - 'crypttext_hash_tree', - 'block_hashes', - 'share_hashes', - 'uri_extension', - ): + for field_name in ( 'data', + 'plaintext_hash_tree', # UNUSED + 'crypttext_hash_tree', + 'block_hashes', + 'share_hashes', + 'uri_extension', + ): offset = struct.unpack(fieldstruct, data[x:x+fieldsize])[0] x += fieldsize - self._offsets[field] = offset + self._offsets[field_name] = offset return self._offsets def _get_block_data(self, unused, blocknum, blocksize, thisblocksize): diff --git a/src/allmydata/mutable/common.py b/src/allmydata/mutable/common.py index 87951c7b2..a498ab02a 100644 --- a/src/allmydata/mutable/common.py +++ b/src/allmydata/mutable/common.py @@ -1,14 +1,7 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations MODE_CHECK = "MODE_CHECK" # query all peers MODE_ANYTHING = "MODE_ANYTHING" # one recoverable version @@ -17,6 +10,9 @@ MODE_WRITE = "MODE_WRITE" # replace all shares, probably.. not for initial MODE_READ = "MODE_READ" MODE_REPAIR = "MODE_REPAIR" # query all peers, get the privkey +from allmydata.crypto import aes, rsa +from allmydata.util import hashutil + class NotWriteableError(Exception): pass @@ -68,3 +64,33 @@ class CorruptShareError(BadShareError): class UnknownVersionError(BadShareError): """The share we received was of a version we don't recognize.""" + + +def encrypt_privkey(writekey: bytes, privkey: bytes) -> bytes: + """ + For SSK, encrypt a private ("signature") key using the writekey. + """ + encryptor = aes.create_encryptor(writekey) + crypttext = aes.encrypt_data(encryptor, privkey) + return crypttext + +def decrypt_privkey(writekey: bytes, enc_privkey: bytes) -> rsa.PrivateKey: + """ + The inverse of ``encrypt_privkey``. + """ + decryptor = aes.create_decryptor(writekey) + privkey = aes.decrypt_data(decryptor, enc_privkey) + return privkey + +def derive_mutable_keys(keypair: tuple[rsa.PublicKey, rsa.PrivateKey]) -> tuple[bytes, bytes, bytes]: + """ + Derive the SSK writekey, encrypted writekey, and fingerprint from the + public/private ("verification" / "signature") keypair. + """ + pubkey, privkey = keypair + pubkey_s = rsa.der_string_from_verifying_key(pubkey) + privkey_s = rsa.der_string_from_signing_key(privkey) + writekey = hashutil.ssk_writekey_hash(privkey_s) + encprivkey = encrypt_privkey(writekey, privkey_s) + fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey_s) + return writekey, encprivkey, fingerprint diff --git a/src/allmydata/mutable/filenode.py b/src/allmydata/mutable/filenode.py index cd8cb0dc7..00b31c52b 100644 --- a/src/allmydata/mutable/filenode.py +++ b/src/allmydata/mutable/filenode.py @@ -1,14 +1,7 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import random @@ -16,8 +9,6 @@ from zope.interface import implementer from twisted.internet import defer, reactor from foolscap.api import eventually -from allmydata.crypto import aes -from allmydata.crypto import rsa from allmydata.interfaces import IMutableFileNode, ICheckable, ICheckResults, \ NotEnoughSharesError, MDMF_VERSION, SDMF_VERSION, IMutableUploadable, \ IMutableFileVersion, IWriteable @@ -28,8 +19,14 @@ from allmydata.uri import WriteableSSKFileURI, ReadonlySSKFileURI, \ from allmydata.monitor import Monitor from allmydata.mutable.publish import Publish, MutableData,\ TransformingUploadable -from allmydata.mutable.common import MODE_READ, MODE_WRITE, MODE_CHECK, UnrecoverableFileError, \ - UncoordinatedWriteError +from allmydata.mutable.common import ( + MODE_READ, + MODE_WRITE, + MODE_CHECK, + UnrecoverableFileError, + UncoordinatedWriteError, + derive_mutable_keys, +) from allmydata.mutable.servermap import ServerMap, ServermapUpdater from allmydata.mutable.retrieve import Retrieve from allmydata.mutable.checker import MutableChecker, MutableCheckAndRepairer @@ -139,13 +136,10 @@ class MutableFileNode(object): Deferred that fires (with the MutableFileNode instance you should use) when it completes. """ - (pubkey, privkey) = keypair - self._pubkey, self._privkey = pubkey, privkey - pubkey_s = rsa.der_string_from_verifying_key(self._pubkey) - privkey_s = rsa.der_string_from_signing_key(self._privkey) - self._writekey = hashutil.ssk_writekey_hash(privkey_s) - self._encprivkey = self._encrypt_privkey(self._writekey, privkey_s) - self._fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey_s) + self._pubkey, self._privkey = keypair + self._writekey, self._encprivkey, self._fingerprint = derive_mutable_keys( + keypair, + ) if version == MDMF_VERSION: self._uri = WriteableMDMFFileURI(self._writekey, self._fingerprint) self._protocol_version = version @@ -171,16 +165,6 @@ class MutableFileNode(object): (contents, type(contents)) return contents(self) - def _encrypt_privkey(self, writekey, privkey): - encryptor = aes.create_encryptor(writekey) - crypttext = aes.encrypt_data(encryptor, privkey) - return crypttext - - def _decrypt_privkey(self, enc_privkey): - decryptor = aes.create_decryptor(self._writekey) - privkey = aes.decrypt_data(decryptor, enc_privkey) - return privkey - def _populate_pubkey(self, pubkey): self._pubkey = pubkey def _populate_required_shares(self, required_shares): diff --git a/src/allmydata/mutable/retrieve.py b/src/allmydata/mutable/retrieve.py index 32aaa72e5..64573a49a 100644 --- a/src/allmydata/mutable/retrieve.py +++ b/src/allmydata/mutable/retrieve.py @@ -1,15 +1,7 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - # Don't import bytes and str, to prevent API leakage - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, max, min # noqa: F401 +from __future__ import annotations import time @@ -32,7 +24,7 @@ from allmydata import hashtree, codec from allmydata.storage.server import si_b2a from allmydata.mutable.common import CorruptShareError, BadShareError, \ - UncoordinatedWriteError + UncoordinatedWriteError, decrypt_privkey from allmydata.mutable.layout import MDMFSlotReadProxy @implementer(IRetrieveStatus) @@ -931,9 +923,10 @@ class Retrieve(object): def _try_to_validate_privkey(self, enc_privkey, reader, server): - alleged_privkey_s = self._node._decrypt_privkey(enc_privkey) + node_writekey = self._node.get_writekey() + alleged_privkey_s = decrypt_privkey(node_writekey, enc_privkey) alleged_writekey = hashutil.ssk_writekey_hash(alleged_privkey_s) - if alleged_writekey != self._node.get_writekey(): + if alleged_writekey != node_writekey: self.log("invalid privkey from %s shnum %d" % (reader, reader.shnum), level=log.WEIRD, umid="YIw4tA") diff --git a/src/allmydata/mutable/servermap.py b/src/allmydata/mutable/servermap.py index 211b1fc16..99aa85d24 100644 --- a/src/allmydata/mutable/servermap.py +++ b/src/allmydata/mutable/servermap.py @@ -1,16 +1,8 @@ """ Ported to Python 3. """ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import annotations -from future.utils import PY2 -if PY2: - # Doesn't import str to prevent API leakage on Python 2 - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 -from past.builtins import unicode from six import ensure_str import sys, time, copy @@ -29,7 +21,7 @@ from allmydata.storage.server import si_b2a from allmydata.interfaces import IServermapUpdaterStatus from allmydata.mutable.common import MODE_CHECK, MODE_ANYTHING, MODE_WRITE, \ - MODE_READ, MODE_REPAIR, CorruptShareError + MODE_READ, MODE_REPAIR, CorruptShareError, decrypt_privkey from allmydata.mutable.layout import SIGNED_PREFIX_LENGTH, MDMFSlotReadProxy @implementer(IServermapUpdaterStatus) @@ -203,8 +195,8 @@ class ServerMap(object): (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo print("[%s]: sh#%d seq%d-%s %d-of-%d len%d" % - (unicode(server.get_name(), "utf-8"), shnum, - seqnum, unicode(base32.b2a(root_hash)[:4], "utf-8"), k, N, + (str(server.get_name(), "utf-8"), shnum, + seqnum, str(base32.b2a(root_hash)[:4], "utf-8"), k, N, datalength), file=out) if self._problems: print("%d PROBLEMS" % len(self._problems), file=out) @@ -276,7 +268,7 @@ class ServerMap(object): """Take a versionid, return a string that describes it.""" (seqnum, root_hash, IV, segsize, datalength, k, N, prefix, offsets_tuple) = verinfo - return "seq%d-%s" % (seqnum, unicode(base32.b2a(root_hash)[:4], "utf-8")) + return "seq%d-%s" % (seqnum, str(base32.b2a(root_hash)[:4], "utf-8")) def summarize_versions(self): """Return a string describing which versions we know about.""" @@ -824,7 +816,7 @@ class ServermapUpdater(object): def notify_server_corruption(self, server, shnum, reason): - if isinstance(reason, unicode): + if isinstance(reason, str): reason = reason.encode("utf-8") ss = server.get_storage_server() ss.advise_corrupt_share( @@ -879,7 +871,7 @@ class ServermapUpdater(object): # ok, it's a valid verinfo. Add it to the list of validated # versions. self.log(" found valid version %d-%s from %s-sh%d: %d-%d/%d/%d" - % (seqnum, unicode(base32.b2a(root_hash)[:4], "utf-8"), + % (seqnum, str(base32.b2a(root_hash)[:4], "utf-8"), ensure_str(server.get_name()), shnum, k, n, segsize, datalen), parent=lp) @@ -951,9 +943,10 @@ class ServermapUpdater(object): writekey stored in my node. If it is valid, then I set the privkey and encprivkey properties of the node. """ - alleged_privkey_s = self._node._decrypt_privkey(enc_privkey) + node_writekey = self._node.get_writekey() + alleged_privkey_s = decrypt_privkey(node_writekey, enc_privkey) alleged_writekey = hashutil.ssk_writekey_hash(alleged_privkey_s) - if alleged_writekey != self._node.get_writekey(): + if alleged_writekey != node_writekey: self.log("invalid privkey from %r shnum %d" % (server.get_name(), shnum), parent=lp, level=log.WEIRD, umid="aJVccw") diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py index 23ba4b451..1b7ea5f45 100644 --- a/src/allmydata/nodemaker.py +++ b/src/allmydata/nodemaker.py @@ -1,17 +1,12 @@ """ -Ported to Python 3. +Create file nodes of various types. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import weakref from zope.interface import implementer +from twisted.internet.defer import succeed from allmydata.util.assertutil import precondition from allmydata.interfaces import INodeMaker from allmydata.immutable.literal import LiteralFileNode @@ -22,6 +17,7 @@ from allmydata.mutable.publish import MutableData from allmydata.dirnode import DirectoryNode, pack_children from allmydata.unknown import UnknownNode from allmydata.blacklist import ProhibitedNode +from allmydata.crypto.rsa import PublicKey, PrivateKey from allmydata import uri @@ -126,12 +122,15 @@ class NodeMaker(object): return self._create_dirnode(filenode) return None - def create_mutable_file(self, contents=None, version=None): + def create_mutable_file(self, contents=None, version=None, keypair: tuple[PublicKey, PrivateKey] | None = None): if version is None: version = self.mutable_file_default n = MutableFileNode(self.storage_broker, self.secret_holder, self.default_encoding_parameters, self.history) - d = self.key_generator.generate() + if keypair is None: + d = self.key_generator.generate() + else: + d = succeed(keypair) d.addCallback(n.create_with_keys, contents, version=version) d.addCallback(lambda res: n) return d diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 55975b8c5..579b37906 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -180,10 +180,22 @@ class GetOptions(FileStoreOptions): class PutOptions(FileStoreOptions): optFlags = [ ("mutable", "m", "Create a mutable file instead of an immutable one (like --format=SDMF)"), - ] + ] + optParameters = [ ("format", None, None, "Create a file with the given format: SDMF and MDMF for mutable, CHK (default) for immutable. (case-insensitive)"), - ] + + ("private-key-path", None, None, + "***Warning*** " + "It is possible to use this option to spoil the normal security properties of mutable objects. " + "It is also possible to corrupt or destroy data with this option. " + "Most users will not need this option and can ignore it. " + "For mutables only, " + "this gives a file containing a PEM-encoded 2048 bit RSA private key to use as the signature key for the mutable. " + "The private key must be handled at least as strictly as the resulting capability string. " + "A single private key must not be used for more than one mutable." + ), + ] def parseArgs(self, arg1=None, arg2=None): # see Examples below diff --git a/src/allmydata/scripts/tahoe_put.py b/src/allmydata/scripts/tahoe_put.py index 1ea45e8ea..c04b6b4bc 100644 --- a/src/allmydata/scripts/tahoe_put.py +++ b/src/allmydata/scripts/tahoe_put.py @@ -1,23 +1,32 @@ """ -Ported to Python 3. +Implement the ``tahoe put`` command. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations from io import BytesIO from urllib.parse import quote as url_quote +from base64 import urlsafe_b64encode +from cryptography.hazmat.primitives.serialization import load_pem_private_key + +from twisted.python.filepath import FilePath + +from allmydata.crypto.rsa import PrivateKey, der_string_from_signing_key from allmydata.scripts.common_http import do_http, format_http_success, format_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.util.encodingutil import quote_output +def load_private_key(path: str) -> str: + """ + Load a private key from a file and return it in a format appropriate + to include in the HTTP request. + """ + privkey = load_pem_private_key(FilePath(path).getContent(), password=None) + assert isinstance(privkey, PrivateKey) + derbytes = der_string_from_signing_key(privkey) + return urlsafe_b64encode(derbytes).decode("ascii") + def put(options): """ @param verbosity: 0, 1, or 2, meaning quiet, verbose, or very verbose @@ -29,6 +38,10 @@ def put(options): from_file = options.from_file to_file = options.to_file mutable = options['mutable'] + if options["private-key-path"] is None: + private_key = None + else: + private_key = load_private_key(options["private-key-path"]) format = options['format'] if options['quiet']: verbosity = 0 @@ -79,6 +92,12 @@ def put(options): queryargs = [] if mutable: queryargs.append("mutable=true") + if private_key is not None: + queryargs.append(f"private-key={private_key}") + else: + if private_key is not None: + raise Exception("Can only supply a private key for mutables.") + if format: queryargs.append("format=%s" % format) if queryargs: @@ -92,10 +111,7 @@ def put(options): if verbosity > 0: print("waiting for file data on stdin..", file=stderr) # We're uploading arbitrary files, so this had better be bytes: - if PY2: - stdinb = stdin - else: - stdinb = stdin.buffer + stdinb = stdin.buffer data = stdinb.read() infileobj = BytesIO(data) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 79bf061c9..90bda7fc0 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -323,6 +323,7 @@ class StorageClient(object): swissnum = nurl.path[0].encode("ascii") certificate_hash = nurl.user.encode("ascii") pool = HTTPConnectionPool(reactor) + pool.maxPersistentPerHost = 20 if cls.TEST_MODE_REGISTER_HTTP_POOL is not None: cls.TEST_MODE_REGISTER_HTTP_POOL(pool) diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 3902976ba..387353d24 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -9,6 +9,9 @@ from functools import wraps from base64 import b64decode import binascii from tempfile import TemporaryFile +from os import SEEK_END, SEEK_SET +import mmap +from importlib.metadata import version as get_package_version, PackageNotFoundError from cryptography.x509 import Certificate as CryptoCertificate from zope.interface import implementer @@ -39,7 +42,7 @@ from cryptography.x509 import load_pem_x509_certificate # TODO Make sure to use pure Python versions? -from cbor2 import dump, loads +import cbor2 from pycddl import Schema, ValidationError as CDDLValidationError from .server import StorageServer from .http_common import ( @@ -57,6 +60,20 @@ from ..util.base32 import rfc3548_alphabet from allmydata.interfaces import BadWriteEnablerError +# Until we figure out Nix (https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3963), +# need to support old pycddl which can only take bytes: +from distutils.version import LooseVersion + +try: + PYCDDL_BYTES_ONLY = LooseVersion(get_package_version("pycddl")) < LooseVersion( + "0.4" + ) +except PackageNotFoundError: + # This can happen when building PyInstaller distribution. We'll just assume + # you installed a modern pycddl, cause why wouldn't you? + PYCDDL_BYTES_ONLY = False + + class ClientSecretsException(Exception): """The client did not send the appropriate secrets.""" @@ -100,7 +117,7 @@ def _authorization_decorator(required_secrets): @wraps(f) def route(self, request, *args, **kwargs): if not timing_safe_compare( - request.requestHeaders.getRawHeaders("Authorization", [None])[0].encode( + request.requestHeaders.getRawHeaders("Authorization", [""])[0].encode( "utf-8" ), swissnum_auth_header(self._swissnum), @@ -278,7 +295,7 @@ _SCHEMAS = { "test-write-vectors": { 0*256 share_number : { "test": [0*30 {"offset": uint, "size": uint, "specimen": bstr}] - "write": [0*30 {"offset": uint, "data": bstr}] + "write": [* {"offset": uint, "data": bstr}] "new-length": uint / null } } @@ -515,7 +532,7 @@ class HTTPServer(object): if accept.best == CBOR_MIME_TYPE: request.setHeader("Content-Type", CBOR_MIME_TYPE) f = TemporaryFile() - dump(data, f) + cbor2.dump(data, f) def read_data(offset: int, length: int) -> bytes: f.seek(offset) @@ -527,27 +544,47 @@ class HTTPServer(object): # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3861 raise _HTTPError(http.NOT_ACCEPTABLE) - def _read_encoded(self, request, schema: Schema) -> Any: + def _read_encoded( + self, request, schema: Schema, max_size: int = 1024 * 1024 + ) -> Any: """ Read encoded request body data, decoding it with CBOR by default. - Somewhat arbitrarily, limit body size to 1MB; this may be too low, we - may want to customize per query type, but this is the starting point - for now. + Somewhat arbitrarily, limit body size to 1MiB by default. """ content_type = get_content_type(request.requestHeaders) - if content_type == CBOR_MIME_TYPE: - # Read 1 byte more than 1MB. We expect length to be 1MB or - # less; if it's more assume it's not a legitimate message. - message = request.content.read(1024 * 1024 + 1) - if len(message) > 1024 * 1024: - raise _HTTPError(http.REQUEST_ENTITY_TOO_LARGE) - schema.validate_cbor(message) - result = loads(message) - return result - else: + if content_type != CBOR_MIME_TYPE: raise _HTTPError(http.UNSUPPORTED_MEDIA_TYPE) + # Make sure it's not too large: + request.content.seek(SEEK_END, 0) + if request.content.tell() > max_size: + raise _HTTPError(http.REQUEST_ENTITY_TOO_LARGE) + request.content.seek(SEEK_SET, 0) + + # We don't want to load the whole message into memory, cause it might + # be quite large. The CDDL validator takes a read-only bytes-like + # thing. Luckily, for large request bodies twisted.web will buffer the + # data in a file, so we can use mmap() to get a memory view. The CDDL + # validator will not make a copy, so it won't increase memory usage + # beyond that. + try: + fd = request.content.fileno() + except (ValueError, OSError): + fd = -1 + if fd >= 0 and not PYCDDL_BYTES_ONLY: + # It's a file, so we can use mmap() to save memory. + message = mmap.mmap(fd, 0, access=mmap.ACCESS_READ) + else: + message = request.content.read() + schema.validate_cbor(message) + + # The CBOR parser will allocate more memory, but at least we can feed + # it the file-like object, so that if it's large it won't be make two + # copies. + request.content.seek(SEEK_SET, 0) + return cbor2.load(request.content) + ##### Generic APIs ##### @_authorized_route(_app, set(), "/storage/v1/version", methods=["GET"]) @@ -746,7 +783,9 @@ class HTTPServer(object): ) def mutable_read_test_write(self, request, authorization, storage_index): """Read/test/write combined operation for mutables.""" - rtw_request = self._read_encoded(request, _SCHEMAS["mutable_read_test_write"]) + rtw_request = self._read_encoded( + request, _SCHEMAS["mutable_read_test_write"], max_size=2**48 + ) secrets = ( authorization[Secrets.WRITE_ENABLER], authorization[Secrets.LEASE_RENEW], diff --git a/src/allmydata/test/_win_subprocess.py b/src/allmydata/test/_win_subprocess.py deleted file mode 100644 index bf9767e73..000000000 --- a/src/allmydata/test/_win_subprocess.py +++ /dev/null @@ -1,210 +0,0 @@ -""" -This module is only necessary on Python 2. Once Python 2 code is dropped, it -can be deleted. -""" - -from future.utils import PY3 -if PY3: - raise RuntimeError("Just use subprocess.Popen") - -# This is necessary to pacify flake8 on Python 3, while we're still supporting -# Python 2. -from past.builtins import unicode - -# -*- coding: utf-8 -*- - -## Copyright (C) 2021 Valentin Lab -## -## Redistribution and use in source and binary forms, with or without -## modification, are permitted provided that the following conditions -## are met: -## -## 1. Redistributions of source code must retain the above copyright -## notice, this list of conditions and the following disclaimer. -## -## 2. Redistributions in binary form must reproduce the above -## copyright notice, this list of conditions and the following -## disclaimer in the documentation and/or other materials provided -## with the distribution. -## -## 3. Neither the name of the copyright holder nor the names of its -## contributors may be used to endorse or promote products derived -## from this software without specific prior written permission. -## -## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS -## FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -## COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, -## INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -## (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -## SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -## OF THE POSSIBILITY OF SUCH DAMAGE. -## - -## issue: https://bugs.python.org/issue19264 - -# See allmydata/windows/fixups.py -import sys -assert sys.platform == "win32" - -import os -import ctypes -import subprocess -import _subprocess -from ctypes import byref, windll, c_char_p, c_wchar_p, c_void_p, \ - Structure, sizeof, c_wchar, WinError -from ctypes.wintypes import BYTE, WORD, LPWSTR, BOOL, DWORD, LPVOID, \ - HANDLE - - -## -## Types -## - -CREATE_UNICODE_ENVIRONMENT = 0x00000400 -LPCTSTR = c_char_p -LPTSTR = c_wchar_p -LPSECURITY_ATTRIBUTES = c_void_p -LPBYTE = ctypes.POINTER(BYTE) - -class STARTUPINFOW(Structure): - _fields_ = [ - ("cb", DWORD), ("lpReserved", LPWSTR), - ("lpDesktop", LPWSTR), ("lpTitle", LPWSTR), - ("dwX", DWORD), ("dwY", DWORD), - ("dwXSize", DWORD), ("dwYSize", DWORD), - ("dwXCountChars", DWORD), ("dwYCountChars", DWORD), - ("dwFillAtrribute", DWORD), ("dwFlags", DWORD), - ("wShowWindow", WORD), ("cbReserved2", WORD), - ("lpReserved2", LPBYTE), ("hStdInput", HANDLE), - ("hStdOutput", HANDLE), ("hStdError", HANDLE), - ] - -LPSTARTUPINFOW = ctypes.POINTER(STARTUPINFOW) - - -class PROCESS_INFORMATION(Structure): - _fields_ = [ - ("hProcess", HANDLE), ("hThread", HANDLE), - ("dwProcessId", DWORD), ("dwThreadId", DWORD), - ] - -LPPROCESS_INFORMATION = ctypes.POINTER(PROCESS_INFORMATION) - - -class DUMMY_HANDLE(ctypes.c_void_p): - - def __init__(self, *a, **kw): - super(DUMMY_HANDLE, self).__init__(*a, **kw) - self.closed = False - - def Close(self): - if not self.closed: - windll.kernel32.CloseHandle(self) - self.closed = True - - def __int__(self): - return self.value - - -CreateProcessW = windll.kernel32.CreateProcessW -CreateProcessW.argtypes = [ - LPCTSTR, LPTSTR, LPSECURITY_ATTRIBUTES, - LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPCTSTR, - LPSTARTUPINFOW, LPPROCESS_INFORMATION, -] -CreateProcessW.restype = BOOL - - -## -## Patched functions/classes -## - -def CreateProcess(executable, args, _p_attr, _t_attr, - inherit_handles, creation_flags, env, cwd, - startup_info): - """Create a process supporting unicode executable and args for win32 - - Python implementation of CreateProcess using CreateProcessW for Win32 - - """ - - si = STARTUPINFOW( - dwFlags=startup_info.dwFlags, - wShowWindow=startup_info.wShowWindow, - cb=sizeof(STARTUPINFOW), - ## XXXvlab: not sure of the casting here to ints. - hStdInput=int(startup_info.hStdInput), - hStdOutput=int(startup_info.hStdOutput), - hStdError=int(startup_info.hStdError), - ) - - wenv = None - if env is not None: - ## LPCWSTR seems to be c_wchar_p, so let's say CWSTR is c_wchar - env = (unicode("").join([ - unicode("%s=%s\0") % (k, v) - for k, v in env.items()])) + unicode("\0") - wenv = (c_wchar * len(env))() - wenv.value = env - - pi = PROCESS_INFORMATION() - creation_flags |= CREATE_UNICODE_ENVIRONMENT - - if CreateProcessW(executable, args, None, None, - inherit_handles, creation_flags, - wenv, cwd, byref(si), byref(pi)): - return (DUMMY_HANDLE(pi.hProcess), DUMMY_HANDLE(pi.hThread), - pi.dwProcessId, pi.dwThreadId) - raise WinError() - - -class Popen(subprocess.Popen): - """This superseeds Popen and corrects a bug in cPython 2.7 implem""" - - def _execute_child(self, args, executable, preexec_fn, close_fds, - cwd, env, universal_newlines, - startupinfo, creationflags, shell, to_close, - p2cread, p2cwrite, - c2pread, c2pwrite, - errread, errwrite): - """Code from part of _execute_child from Python 2.7 (9fbb65e) - - There are only 2 little changes concerning the construction of - the the final string in shell mode: we preempt the creation of - the command string when shell is True, because original function - will try to encode unicode args which we want to avoid to be able to - sending it as-is to ``CreateProcess``. - - """ - if not isinstance(args, subprocess.types.StringTypes): - args = subprocess.list2cmdline(args) - - if startupinfo is None: - startupinfo = subprocess.STARTUPINFO() - if shell: - startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW - startupinfo.wShowWindow = _subprocess.SW_HIDE - comspec = os.environ.get("COMSPEC", unicode("cmd.exe")) - args = unicode('{} /c "{}"').format(comspec, args) - if (_subprocess.GetVersion() >= 0x80000000 or - os.path.basename(comspec).lower() == "command.com"): - w9xpopen = self._find_w9xpopen() - args = unicode('"%s" %s') % (w9xpopen, args) - creationflags |= _subprocess.CREATE_NEW_CONSOLE - - cp = _subprocess.CreateProcess - _subprocess.CreateProcess = CreateProcess - try: - super(Popen, self)._execute_child( - args, executable, - preexec_fn, close_fds, cwd, env, universal_newlines, - startupinfo, creationflags, False, to_close, p2cread, - p2cwrite, c2pread, c2pwrite, errread, errwrite, - ) - finally: - _subprocess.CreateProcess = cp diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 03306ab71..c5f32a553 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,19 +1,18 @@ """ -Ported to Python 3. +Tests for the ``tahoe put`` CLI tool. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations +from typing import Callable, Awaitable, TypeVar, Any import os.path from twisted.trial import unittest from twisted.python import usage +from twisted.python.filepath import FilePath +from cryptography.hazmat.primitives.serialization import load_pem_private_key + +from allmydata.crypto.rsa import PrivateKey +from allmydata.uri import from_string from allmydata.util import fileutil from allmydata.scripts.common import get_aliases from allmydata.scripts import cli @@ -22,6 +21,9 @@ from ..common_util import skip_if_cannot_represent_filename from allmydata.util.encodingutil import get_io_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin +from allmydata.mutable.common import derive_mutable_keys + +T = TypeVar("T") class Put(GridTestMixin, CLITestMixin, unittest.TestCase): @@ -215,6 +217,65 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): return d + async def test_unlinked_mutable_specified_private_key(self) -> None: + """ + A new unlinked mutable can be created using a specified private + key. + """ + self.basedir = "cli/Put/unlinked-mutable-with-key" + await self._test_mutable_specified_key( + lambda do_cli, pempath, datapath: do_cli( + "put", "--mutable", "--private-key-path", pempath.path, + stdin=datapath.getContent(), + ), + ) + + async def test_linked_mutable_specified_private_key(self) -> None: + """ + A new linked mutable can be created using a specified private key. + """ + self.basedir = "cli/Put/linked-mutable-with-key" + await self._test_mutable_specified_key( + lambda do_cli, pempath, datapath: do_cli( + "put", "--mutable", "--private-key-path", pempath.path, datapath.path, + ), + ) + + async def _test_mutable_specified_key( + self, + run: Callable[[Any, FilePath, FilePath], Awaitable[tuple[int, bytes, bytes]]], + ) -> None: + """ + A helper for testing mutable creation. + + :param run: A function to do the creation. It is called with + ``self.do_cli`` and the path to a private key PEM file and a data + file. It returns whatever ``do_cli`` returns. + """ + self.set_up_grid(oneshare=True) + + pempath = FilePath(__file__).parent().sibling("data").child("openssl-rsa-2048.txt") + datapath = FilePath(self.basedir).child("data") + datapath.setContent(b"Hello world" * 1024) + + (rc, out, err) = await run(self.do_cli, pempath, datapath) + self.assertEqual(rc, 0, (out, err)) + cap = from_string(out.strip()) + # The capability is derived from the key we specified. + privkey = load_pem_private_key(pempath.getContent(), password=None) + assert isinstance(privkey, PrivateKey) + pubkey = privkey.public_key() + writekey, _, fingerprint = derive_mutable_keys((pubkey, privkey)) + self.assertEqual( + (writekey, fingerprint), + (cap.writekey, cap.fingerprint), + ) + # Also the capability we were given actually refers to the data we + # uploaded. + (rc, out, err) = await self.do_cli("get", out.strip()) + self.assertEqual(rc, 0, (out, err)) + self.assertEqual(out, datapath.getContent().decode("ascii")) + def test_mutable(self): # echo DATA1 | tahoe put --mutable - uploaded.txt # echo DATA2 | tahoe put - uploaded.txt # should modify-in-place diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index b652b2e48..db2921e86 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -1,14 +1,8 @@ """ -Ported to Python 3. +Functionality related to a lot of the test suite. """ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import annotations -from future.utils import PY2, native_str -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from past.builtins import chr as byteschr __all__ = [ @@ -111,25 +105,15 @@ from allmydata.scripts.common import ( from ..crypto import ( ed25519, + rsa, ) from .eliotutil import ( EliotLoggedRunTest, ) from .common_util import ShouldFailMixin # noqa: F401 -if sys.platform == "win32" and PY2: - # Python 2.7 doesn't have good options for launching a process with - # non-ASCII in its command line. So use this alternative that does a - # better job. However, only use it on Windows because it doesn't work - # anywhere else. - from ._win_subprocess import ( - Popen, - ) -else: - from subprocess import ( - Popen, - ) from subprocess import ( + Popen, PIPE, ) @@ -298,7 +282,7 @@ class UseNode(object): plugin_config = attr.ib() storage_plugin = attr.ib() basedir = attr.ib(validator=attr.validators.instance_of(FilePath)) - introducer_furl = attr.ib(validator=attr.validators.instance_of(native_str), + introducer_furl = attr.ib(validator=attr.validators.instance_of(str), converter=six.ensure_str) node_config = attr.ib(default=attr.Factory(dict)) @@ -639,15 +623,28 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation MUTABLE_SIZELIMIT = 10000 - def __init__(self, storage_broker, secret_holder, - default_encoding_parameters, history, all_contents): + _public_key: rsa.PublicKey | None + _private_key: rsa.PrivateKey | None + + def __init__(self, + storage_broker, + secret_holder, + default_encoding_parameters, + history, + all_contents, + keypair: tuple[rsa.PublicKey, rsa.PrivateKey] | None + ): self.all_contents = all_contents - self.file_types = {} # storage index => MDMF_VERSION or SDMF_VERSION - self.init_from_cap(make_mutable_file_cap()) + self.file_types: dict[bytes, int] = {} # storage index => MDMF_VERSION or SDMF_VERSION + self.init_from_cap(make_mutable_file_cap(keypair)) self._k = default_encoding_parameters['k'] self._segsize = default_encoding_parameters['max_segment_size'] - def create(self, contents, key_generator=None, keysize=None, - version=SDMF_VERSION): + if keypair is None: + self._public_key = self._private_key = None + else: + self._public_key, self._private_key = keypair + + def create(self, contents, version=SDMF_VERSION): if version == MDMF_VERSION and \ isinstance(self.my_uri, (uri.ReadonlySSKFileURI, uri.WriteableSSKFileURI)): @@ -843,9 +840,28 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation return defer.succeed(consumer) -def make_mutable_file_cap(): - return uri.WriteableSSKFileURI(writekey=os.urandom(16), - fingerprint=os.urandom(32)) +def make_mutable_file_cap( + keypair: tuple[rsa.PublicKey, rsa.PrivateKey] | None = None, +) -> uri.WriteableSSKFileURI: + """ + Create a local representation of a mutable object. + + :param keypair: If None, a random keypair will be generated for the new + object. Otherwise, this is the keypair for that object. + """ + if keypair is None: + writekey = os.urandom(16) + fingerprint = os.urandom(32) + else: + pubkey, privkey = keypair + pubkey_s = rsa.der_string_from_verifying_key(pubkey) + privkey_s = rsa.der_string_from_signing_key(privkey) + writekey = hashutil.ssk_writekey_hash(privkey_s) + fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey_s) + + return uri.WriteableSSKFileURI( + writekey=writekey, fingerprint=fingerprint, + ) def make_mdmf_mutable_file_cap(): return uri.WriteableMDMFFileURI(writekey=os.urandom(16), @@ -875,7 +891,7 @@ def create_mutable_filenode(contents, mdmf=False, all_contents=None): encoding_params['max_segment_size'] = 128*1024 filenode = FakeMutableFileNode(None, None, encoding_params, None, - all_contents) + all_contents, None) filenode.init_from_cap(cap) if mdmf: filenode.create(MutableData(contents), version=MDMF_VERSION) diff --git a/src/allmydata/test/data/openssl-rsa-2048.txt b/src/allmydata/test/data/openssl-rsa-2048.txt new file mode 100644 index 000000000..8f989f42c --- /dev/null +++ b/src/allmydata/test/data/openssl-rsa-2048.txt @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDF1MeXulDWFO05 +YXCh8aqNc1dS1ddJRzsti4BOWuDOepUc0oCaSIcC5aR7XJ+vhX7a02mTIwvLcuEH +8sxx0BJU4jCDpRI6aAqaKJxwZx1e6AcVFJDl7vzymhvWhqHuKh0jTvwM2zONWTwV +V8m2PbDdxu0Prwdx+Mt2sDT6xHEhJj5fI/GUDUEdkhLJF6DQSulFRqqd0qP7qcI9 +fSHZbM7MywfzqFUe8J1+tk4fBh2v7gNzN1INpzh2mDtLPAtxr4ZPtEb/0D0U4PsP +CniOHP0U8sF3VY0+K5qoCQr92cLRJvT/vLpQGVNUTFdFrtbqDoFxUCyEH4FUqRDX +2mVrPo2xAgMBAAECggEAA0Ev1y5/1NTPbgytBeIIH3d+v9hwKDbHecVoMwnOVeFJ +BZpONrOToovhAc1NXH2wj4SvwYWfpJ1HR9piDAuLeKlnuUu4ffzfE0gQok4E+v4r +2yg9ZcYBs/NOetAYVwbq960tiv/adFRr71E0WqbfS3fBx8q2L3Ujkkhd98PudUhQ +izbrTvkT7q00OPCWGwgWepMlLEowUWwZehGI0MlbONg7SbRraZZmG586Iy0tpC3e +AM7wC1/ORzFqcRgTIxXizQ5RHL7S0OQPLhbEJbuwPonNjze3p0EP4wNBELZTaVOd +xeA22Py4Bh/d1q3aEgbwR7tLyA8YfEzshTaY6oV8AQKBgQD0uFo8pyWk0AWXfjzn +jV4yYyPWy8pJA6YfAJAST8m7B/JeYgGlfHxTlNZiB40DsJq08tOZv3HAubgMpFIa +reuDxPqo6/Quwdy4Syu+AFhY48KIuwuoegG/L+5qcQLE69r1w71ZV6wUvLmXYX2I +Y6nYz+OdpD1JrMIr6Js60XURsQKBgQDO8yWl7ufIDKMbQpbs0PgUQsH4FtzGcP4J +j/7/8GfhKYt6rPsrojPHUbAi1+25xBVOuhm0Zx2ku2t+xPIMJoS+15EcER1Z2iHZ +Zci9UGpJpUxGcUhG7ETF1HZv0xKHcEOl9eIIOcAP9Vd9DqnGk85gy6ti6MHe/5Tn +IMD36OQ8AQKBgQDwqE7NMM67KnslRNaeG47T3F0FQbm3XehCuqnz6BUJYcI+gQD/ +fdFB3K+LDcPmKgmqAtaGbxdtoPXXMM0xQXHHTrH15rxmMu1dK0dj/TDkkW7gSZko +YHtRSdCbSnGfuBXG9GxD7QzkA8g7j3sE4oXIGoDLqRVAW61DwubMy+jlsQKBgGNB ++Zepi1/Gt+BWQt8YpzPIhRIBnShMf3uEphCJdLlo3K4dE2btKBp8UpeTq0CDDJky +5ytAndYp0jf+K/2p59dEuyOUDdjPp5aGnA446JGkB35tzPW/Uoj0C049FVEChl+u +HBhH4peE285uXv2QXNbOOMh6zKmxOfDVI9iDyhwBAoGBAIXq2Ar0zDXXaL3ncEKo +pXt9BZ8OpJo2pvB1t2VPePOwEQ0wdT+H62fKNY47NiF9+LyS541/ps5Qhv6AmiKJ +Z7I0Vb6+sxQljYH/LNW+wc2T/pIAi/7sNcmnlBtZfoVwt99bk2CyoRALPLWHYCkh +c7Tty2bZzDZy6aCX+FGRt5N/ +-----END PRIVATE KEY----- diff --git a/src/allmydata/test/mutable/test_filenode.py b/src/allmydata/test/mutable/test_filenode.py index 579734433..6c00e4420 100644 --- a/src/allmydata/test/mutable/test_filenode.py +++ b/src/allmydata/test/mutable/test_filenode.py @@ -30,6 +30,7 @@ from allmydata.mutable.publish import MutableData from ..test_download import PausingConsumer, PausingAndStoppingConsumer, \ StoppingConsumer, ImmediatelyStoppingConsumer from .. import common_util as testutil +from ...crypto.rsa import create_signing_keypair from .util import ( FakeStorage, make_nodemaker_with_peers, @@ -65,6 +66,16 @@ class Filenode(AsyncBrokenTestCase, testutil.ShouldFailMixin): d.addCallback(_created) return d + async def test_create_with_keypair(self): + """ + An SDMF can be created using a given keypair. + """ + (priv, pub) = create_signing_keypair(2048) + node = await self.nodemaker.create_mutable_file(keypair=(pub, priv)) + self.assertThat( + (node.get_privkey(), node.get_pubkey()), + Equals((priv, pub)), + ) def test_create_mdmf(self): d = self.nodemaker.create_mutable_file(version=MDMF_VERSION) diff --git a/src/allmydata/test/mutable/test_version.py b/src/allmydata/test/mutable/test_version.py index d5c44f204..87050424b 100644 --- a/src/allmydata/test/mutable/test_version.py +++ b/src/allmydata/test/mutable/test_version.py @@ -1,19 +1,12 @@ """ -Ported to Python 3. +Tests related to the way ``allmydata.mutable`` handles different versions +of data for an object. """ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from io import StringIO import os -from six.moves import cStringIO as StringIO +from typing import Optional -from twisted.internet import defer from ..common import AsyncTestCase from testtools.matchers import ( Equals, @@ -47,343 +40,268 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ self.small_data = b"test data" * 10 # 90 B; SDMF - def do_upload_mdmf(self, data=None): + async def do_upload_mdmf(self, data: Optional[bytes] = None) -> MutableFileNode: if data is None: data = self.data - d = self.nm.create_mutable_file(MutableData(data), - version=MDMF_VERSION) - def _then(n): - self.assertThat(n, IsInstance(MutableFileNode)) - self.assertThat(n._protocol_version, Equals(MDMF_VERSION)) - self.mdmf_node = n - return n - d.addCallback(_then) - return d + n = await self.nm.create_mutable_file(MutableData(data), + version=MDMF_VERSION) + self.assertThat(n, IsInstance(MutableFileNode)) + self.assertThat(n._protocol_version, Equals(MDMF_VERSION)) + self.mdmf_node = n + return n - def do_upload_sdmf(self, data=None): + async def do_upload_sdmf(self, data: Optional[bytes] = None) -> MutableFileNode: if data is None: data = self.small_data - d = self.nm.create_mutable_file(MutableData(data)) - def _then(n): - self.assertThat(n, IsInstance(MutableFileNode)) - self.assertThat(n._protocol_version, Equals(SDMF_VERSION)) - self.sdmf_node = n - return n - d.addCallback(_then) - return d + n = await self.nm.create_mutable_file(MutableData(data)) + self.assertThat(n, IsInstance(MutableFileNode)) + self.assertThat(n._protocol_version, Equals(SDMF_VERSION)) + self.sdmf_node = n + return n - def do_upload_empty_sdmf(self): - d = self.nm.create_mutable_file(MutableData(b"")) - def _then(n): - self.assertThat(n, IsInstance(MutableFileNode)) - self.sdmf_zero_length_node = n - self.assertThat(n._protocol_version, Equals(SDMF_VERSION)) - return n - d.addCallback(_then) - return d + async def do_upload_empty_sdmf(self) -> MutableFileNode: + n = await self.nm.create_mutable_file(MutableData(b"")) + self.assertThat(n, IsInstance(MutableFileNode)) + self.sdmf_zero_length_node = n + self.assertThat(n._protocol_version, Equals(SDMF_VERSION)) + return n - def do_upload(self): - d = self.do_upload_mdmf() - d.addCallback(lambda ign: self.do_upload_sdmf()) - return d + async def do_upload(self) -> MutableFileNode: + await self.do_upload_mdmf() + return await self.do_upload_sdmf() - def test_debug(self): - d = self.do_upload_mdmf() - def _debug(n): - fso = debug.FindSharesOptions() - storage_index = base32.b2a(n.get_storage_index()) - fso.si_s = str(storage_index, "utf-8") # command-line options are unicode on Python 3 - fso.nodedirs = [os.path.dirname(abspath_expanduser_unicode(str(storedir))) - for (i,ss,storedir) - in self.iterate_servers()] - fso.stdout = StringIO() - fso.stderr = StringIO() - debug.find_shares(fso) - sharefiles = fso.stdout.getvalue().splitlines() - expected = self.nm.default_encoding_parameters["n"] - self.assertThat(sharefiles, HasLength(expected)) + async def test_debug(self) -> None: + n = await self.do_upload_mdmf() + fso = debug.FindSharesOptions() + storage_index = base32.b2a(n.get_storage_index()) + fso.si_s = str(storage_index, "utf-8") # command-line options are unicode on Python 3 + fso.nodedirs = [os.path.dirname(abspath_expanduser_unicode(str(storedir))) + for (i,ss,storedir) + in self.iterate_servers()] + fso.stdout = StringIO() + fso.stderr = StringIO() + debug.find_shares(fso) + sharefiles = fso.stdout.getvalue().splitlines() + expected = self.nm.default_encoding_parameters["n"] + self.assertThat(sharefiles, HasLength(expected)) - do = debug.DumpOptions() - do["filename"] = sharefiles[0] - do.stdout = StringIO() - debug.dump_share(do) - output = do.stdout.getvalue() - lines = set(output.splitlines()) - self.assertTrue("Mutable slot found:" in lines, output) - self.assertTrue(" share_type: MDMF" in lines, output) - self.assertTrue(" num_extra_leases: 0" in lines, output) - self.assertTrue(" MDMF contents:" in lines, output) - self.assertTrue(" seqnum: 1" in lines, output) - self.assertTrue(" required_shares: 3" in lines, output) - self.assertTrue(" total_shares: 10" in lines, output) - self.assertTrue(" segsize: 131073" in lines, output) - self.assertTrue(" datalen: %d" % len(self.data) in lines, output) - vcap = str(n.get_verify_cap().to_string(), "utf-8") - self.assertTrue(" verify-cap: %s" % vcap in lines, output) - cso = debug.CatalogSharesOptions() - cso.nodedirs = fso.nodedirs - cso.stdout = StringIO() - cso.stderr = StringIO() - debug.catalog_shares(cso) - shares = cso.stdout.getvalue().splitlines() - oneshare = shares[0] # all shares should be MDMF - self.failIf(oneshare.startswith("UNKNOWN"), oneshare) - self.assertTrue(oneshare.startswith("MDMF"), oneshare) - fields = oneshare.split() - self.assertThat(fields[0], Equals("MDMF")) - self.assertThat(fields[1].encode("ascii"), Equals(storage_index)) - self.assertThat(fields[2], Equals("3/10")) - self.assertThat(fields[3], Equals("%d" % len(self.data))) - self.assertTrue(fields[4].startswith("#1:"), fields[3]) - # the rest of fields[4] is the roothash, which depends upon - # encryption salts and is not constant. fields[5] is the - # remaining time on the longest lease, which is timing dependent. - # The rest of the line is the quoted pathname to the share. - d.addCallback(_debug) - return d + do = debug.DumpOptions() + do["filename"] = sharefiles[0] + do.stdout = StringIO() + debug.dump_share(do) + output = do.stdout.getvalue() + lines = set(output.splitlines()) + self.assertTrue("Mutable slot found:" in lines, output) + self.assertTrue(" share_type: MDMF" in lines, output) + self.assertTrue(" num_extra_leases: 0" in lines, output) + self.assertTrue(" MDMF contents:" in lines, output) + self.assertTrue(" seqnum: 1" in lines, output) + self.assertTrue(" required_shares: 3" in lines, output) + self.assertTrue(" total_shares: 10" in lines, output) + self.assertTrue(" segsize: 131073" in lines, output) + self.assertTrue(" datalen: %d" % len(self.data) in lines, output) + vcap = str(n.get_verify_cap().to_string(), "utf-8") + self.assertTrue(" verify-cap: %s" % vcap in lines, output) + cso = debug.CatalogSharesOptions() + cso.nodedirs = fso.nodedirs + cso.stdout = StringIO() + cso.stderr = StringIO() + debug.catalog_shares(cso) + shares = cso.stdout.getvalue().splitlines() + oneshare = shares[0] # all shares should be MDMF + self.failIf(oneshare.startswith("UNKNOWN"), oneshare) + self.assertTrue(oneshare.startswith("MDMF"), oneshare) + fields = oneshare.split() + self.assertThat(fields[0], Equals("MDMF")) + self.assertThat(fields[1].encode("ascii"), Equals(storage_index)) + self.assertThat(fields[2], Equals("3/10")) + self.assertThat(fields[3], Equals("%d" % len(self.data))) + self.assertTrue(fields[4].startswith("#1:"), fields[3]) + # the rest of fields[4] is the roothash, which depends upon + # encryption salts and is not constant. fields[5] is the + # remaining time on the longest lease, which is timing dependent. + # The rest of the line is the quoted pathname to the share. + + async def test_get_sequence_number(self) -> None: + await self.do_upload() + bv = await self.mdmf_node.get_best_readable_version() + self.assertThat(bv.get_sequence_number(), Equals(1)) + bv = await self.sdmf_node.get_best_readable_version() + self.assertThat(bv.get_sequence_number(), Equals(1)) - def test_get_sequence_number(self): - d = self.do_upload() - d.addCallback(lambda ign: self.mdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: - self.assertThat(bv.get_sequence_number(), Equals(1))) - d.addCallback(lambda ignored: - self.sdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: - self.assertThat(bv.get_sequence_number(), Equals(1))) # Now update. The sequence number in both cases should be 1 in # both cases. - def _do_update(ignored): - new_data = MutableData(b"foo bar baz" * 100000) - new_small_data = MutableData(b"foo bar baz" * 10) - d1 = self.mdmf_node.overwrite(new_data) - d2 = self.sdmf_node.overwrite(new_small_data) - dl = gatherResults([d1, d2]) - return dl - d.addCallback(_do_update) - d.addCallback(lambda ignored: - self.mdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: - self.assertThat(bv.get_sequence_number(), Equals(2))) - d.addCallback(lambda ignored: - self.sdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: - self.assertThat(bv.get_sequence_number(), Equals(2))) - return d + new_data = MutableData(b"foo bar baz" * 100000) + new_small_data = MutableData(b"foo bar baz" * 10) + d1 = self.mdmf_node.overwrite(new_data) + d2 = self.sdmf_node.overwrite(new_small_data) + await gatherResults([d1, d2]) + bv = await self.mdmf_node.get_best_readable_version() + self.assertThat(bv.get_sequence_number(), Equals(2)) + bv = await self.sdmf_node.get_best_readable_version() + self.assertThat(bv.get_sequence_number(), Equals(2)) - - def test_cap_after_upload(self): + async def test_cap_after_upload(self) -> None: # If we create a new mutable file and upload things to it, and # it's an MDMF file, we should get an MDMF cap back from that # file and should be able to use that. # That's essentially what MDMF node is, so just check that. - d = self.do_upload_mdmf() - def _then(ign): - mdmf_uri = self.mdmf_node.get_uri() - cap = uri.from_string(mdmf_uri) - self.assertTrue(isinstance(cap, uri.WriteableMDMFFileURI)) - readonly_mdmf_uri = self.mdmf_node.get_readonly_uri() - cap = uri.from_string(readonly_mdmf_uri) - self.assertTrue(isinstance(cap, uri.ReadonlyMDMFFileURI)) - d.addCallback(_then) - return d + await self.do_upload_mdmf() + mdmf_uri = self.mdmf_node.get_uri() + cap = uri.from_string(mdmf_uri) + self.assertTrue(isinstance(cap, uri.WriteableMDMFFileURI)) + readonly_mdmf_uri = self.mdmf_node.get_readonly_uri() + cap = uri.from_string(readonly_mdmf_uri) + self.assertTrue(isinstance(cap, uri.ReadonlyMDMFFileURI)) - def test_mutable_version(self): + async def test_mutable_version(self) -> None: # assert that getting parameters from the IMutableVersion object # gives us the same data as getting them from the filenode itself - d = self.do_upload() - d.addCallback(lambda ign: self.mdmf_node.get_best_mutable_version()) - def _check_mdmf(bv): - n = self.mdmf_node - self.assertThat(bv.get_writekey(), Equals(n.get_writekey())) - self.assertThat(bv.get_storage_index(), Equals(n.get_storage_index())) - self.assertFalse(bv.is_readonly()) - d.addCallback(_check_mdmf) - d.addCallback(lambda ign: self.sdmf_node.get_best_mutable_version()) - def _check_sdmf(bv): - n = self.sdmf_node - self.assertThat(bv.get_writekey(), Equals(n.get_writekey())) - self.assertThat(bv.get_storage_index(), Equals(n.get_storage_index())) - self.assertFalse(bv.is_readonly()) - d.addCallback(_check_sdmf) - return d + await self.do_upload() + bv = await self.mdmf_node.get_best_mutable_version() + n = self.mdmf_node + self.assertThat(bv.get_writekey(), Equals(n.get_writekey())) + self.assertThat(bv.get_storage_index(), Equals(n.get_storage_index())) + self.assertFalse(bv.is_readonly()) + + bv = await self.sdmf_node.get_best_mutable_version() + n = self.sdmf_node + self.assertThat(bv.get_writekey(), Equals(n.get_writekey())) + self.assertThat(bv.get_storage_index(), Equals(n.get_storage_index())) + self.assertFalse(bv.is_readonly()) - def test_get_readonly_version(self): - d = self.do_upload() - d.addCallback(lambda ign: self.mdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: self.assertTrue(bv.is_readonly())) + async def test_get_readonly_version(self) -> None: + await self.do_upload() + bv = await self.mdmf_node.get_best_readable_version() + self.assertTrue(bv.is_readonly()) # Attempting to get a mutable version of a mutable file from a # filenode initialized with a readcap should return a readonly # version of that same node. - d.addCallback(lambda ign: self.mdmf_node.get_readonly()) - d.addCallback(lambda ro: ro.get_best_mutable_version()) - d.addCallback(lambda v: self.assertTrue(v.is_readonly())) + ro = self.mdmf_node.get_readonly() + v = await ro.get_best_mutable_version() + self.assertTrue(v.is_readonly()) - d.addCallback(lambda ign: self.sdmf_node.get_best_readable_version()) - d.addCallback(lambda bv: self.assertTrue(bv.is_readonly())) + bv = await self.sdmf_node.get_best_readable_version() + self.assertTrue(bv.is_readonly()) - d.addCallback(lambda ign: self.sdmf_node.get_readonly()) - d.addCallback(lambda ro: ro.get_best_mutable_version()) - d.addCallback(lambda v: self.assertTrue(v.is_readonly())) - return d + ro = self.sdmf_node.get_readonly() + v = await ro.get_best_mutable_version() + self.assertTrue(v.is_readonly()) - def test_toplevel_overwrite(self): + async def test_toplevel_overwrite(self) -> None: new_data = MutableData(b"foo bar baz" * 100000) new_small_data = MutableData(b"foo bar baz" * 10) - d = self.do_upload() - d.addCallback(lambda ign: self.mdmf_node.overwrite(new_data)) - d.addCallback(lambda ignored: - self.mdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Equals(b"foo bar baz" * 100000))) - d.addCallback(lambda ignored: - self.sdmf_node.overwrite(new_small_data)) - d.addCallback(lambda ignored: - self.sdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Equals(b"foo bar baz" * 10))) - return d + await self.do_upload() + await self.mdmf_node.overwrite(new_data) + data = await self.mdmf_node.download_best_version() + self.assertThat(data, Equals(b"foo bar baz" * 100000)) + await self.sdmf_node.overwrite(new_small_data) + data = await self.sdmf_node.download_best_version() + self.assertThat(data, Equals(b"foo bar baz" * 10)) - def test_toplevel_modify(self): - d = self.do_upload() + async def test_toplevel_modify(self) -> None: + await self.do_upload() def modifier(old_contents, servermap, first_time): return old_contents + b"modified" - d.addCallback(lambda ign: self.mdmf_node.modify(modifier)) - d.addCallback(lambda ignored: - self.mdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Contains(b"modified"))) - d.addCallback(lambda ignored: - self.sdmf_node.modify(modifier)) - d.addCallback(lambda ignored: - self.sdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Contains(b"modified"))) - return d + await self.mdmf_node.modify(modifier) + data = await self.mdmf_node.download_best_version() + self.assertThat(data, Contains(b"modified")) + await self.sdmf_node.modify(modifier) + data = await self.sdmf_node.download_best_version() + self.assertThat(data, Contains(b"modified")) - def test_version_modify(self): + async def test_version_modify(self) -> None: # TODO: When we can publish multiple versions, alter this test # to modify a version other than the best usable version, then # test to see that the best recoverable version is that. - d = self.do_upload() + await self.do_upload() def modifier(old_contents, servermap, first_time): return old_contents + b"modified" - d.addCallback(lambda ign: self.mdmf_node.modify(modifier)) - d.addCallback(lambda ignored: - self.mdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Contains(b"modified"))) - d.addCallback(lambda ignored: - self.sdmf_node.modify(modifier)) - d.addCallback(lambda ignored: - self.sdmf_node.download_best_version()) - d.addCallback(lambda data: - self.assertThat(data, Contains(b"modified"))) - return d + await self.mdmf_node.modify(modifier) + data = await self.mdmf_node.download_best_version() + self.assertThat(data, Contains(b"modified")) + await self.sdmf_node.modify(modifier) + data = await self.sdmf_node.download_best_version() + self.assertThat(data, Contains(b"modified")) - def test_download_version(self): - d = self.publish_multiple() + async def test_download_version(self) -> None: + await self.publish_multiple() # We want to have two recoverable versions on the grid. - d.addCallback(lambda res: - self._set_versions({0:0,2:0,4:0,6:0,8:0, - 1:1,3:1,5:1,7:1,9:1})) + self._set_versions({0:0,2:0,4:0,6:0,8:0, + 1:1,3:1,5:1,7:1,9:1}) # Now try to download each version. We should get the plaintext # associated with that version. - d.addCallback(lambda ignored: - self._fn.get_servermap(mode=MODE_READ)) - def _got_servermap(smap): - versions = smap.recoverable_versions() - assert len(versions) == 2 + smap = await self._fn.get_servermap(mode=MODE_READ) + versions = smap.recoverable_versions() + assert len(versions) == 2 - self.servermap = smap - self.version1, self.version2 = versions - assert self.version1 != self.version2 + self.servermap = smap + self.version1, self.version2 = versions + assert self.version1 != self.version2 - self.version1_seqnum = self.version1[0] - self.version2_seqnum = self.version2[0] - self.version1_index = self.version1_seqnum - 1 - self.version2_index = self.version2_seqnum - 1 + self.version1_seqnum = self.version1[0] + self.version2_seqnum = self.version2[0] + self.version1_index = self.version1_seqnum - 1 + self.version2_index = self.version2_seqnum - 1 - d.addCallback(_got_servermap) - d.addCallback(lambda ignored: - self._fn.download_version(self.servermap, self.version1)) - d.addCallback(lambda results: - self.assertThat(self.CONTENTS[self.version1_index], - Equals(results))) - d.addCallback(lambda ignored: - self._fn.download_version(self.servermap, self.version2)) - d.addCallback(lambda results: - self.assertThat(self.CONTENTS[self.version2_index], - Equals(results))) - return d + results = await self._fn.download_version(self.servermap, self.version1) + self.assertThat(self.CONTENTS[self.version1_index], + Equals(results)) + results = await self._fn.download_version(self.servermap, self.version2) + self.assertThat(self.CONTENTS[self.version2_index], + Equals(results)) - def test_download_nonexistent_version(self): - d = self.do_upload_mdmf() - d.addCallback(lambda ign: self.mdmf_node.get_servermap(mode=MODE_WRITE)) - def _set_servermap(servermap): - self.servermap = servermap - d.addCallback(_set_servermap) - d.addCallback(lambda ignored: - self.shouldFail(UnrecoverableFileError, "nonexistent version", - None, - self.mdmf_node.download_version, self.servermap, - "not a version")) - return d + async def test_download_nonexistent_version(self) -> None: + await self.do_upload_mdmf() + servermap = await self.mdmf_node.get_servermap(mode=MODE_WRITE) + await self.shouldFail(UnrecoverableFileError, "nonexistent version", + None, + self.mdmf_node.download_version, servermap, + "not a version") - def _test_partial_read(self, node, expected, modes, step): - d = node.get_best_readable_version() + async def _test_partial_read(self, node, expected, modes, step) -> None: + version = await node.get_best_readable_version() for (name, offset, length) in modes: - d.addCallback(self._do_partial_read, name, expected, offset, length) + await self._do_partial_read(version, name, expected, offset, length) # then read the whole thing, but only a few bytes at a time, and see # that the results are what we expect. - def _read_data(version): - c = consumer.MemoryConsumer() - d2 = defer.succeed(None) - for i in range(0, len(expected), step): - d2.addCallback(lambda ignored, i=i: version.read(c, i, step)) - d2.addCallback(lambda ignored: - self.assertThat(expected, Equals(b"".join(c.chunks)))) - return d2 - d.addCallback(_read_data) - return d - - def _do_partial_read(self, version, name, expected, offset, length): c = consumer.MemoryConsumer() - d = version.read(c, offset, length) + for i in range(0, len(expected), step): + await version.read(c, i, step) + self.assertThat(expected, Equals(b"".join(c.chunks))) + + async def _do_partial_read(self, version, name, expected, offset, length) -> None: + c = consumer.MemoryConsumer() + await version.read(c, offset, length) if length is None: expected_range = expected[offset:] else: expected_range = expected[offset:offset+length] - d.addCallback(lambda ignored: b"".join(c.chunks)) - def _check(results): - if results != expected_range: - print("read([%d]+%s) got %d bytes, not %d" % \ - (offset, length, len(results), len(expected_range))) - print("got: %s ... %s" % (results[:20], results[-20:])) - print("exp: %s ... %s" % (expected_range[:20], expected_range[-20:])) - self.fail("results[%s] != expected_range" % name) - return version # daisy-chained to next call - d.addCallback(_check) - return d + results = b"".join(c.chunks) + if results != expected_range: + print("read([%d]+%s) got %d bytes, not %d" % \ + (offset, length, len(results), len(expected_range))) + print("got: %r ... %r" % (results[:20], results[-20:])) + print("exp: %r ... %r" % (expected_range[:20], expected_range[-20:])) + self.fail("results[%s] != expected_range" % name) - def test_partial_read_mdmf_0(self): + async def test_partial_read_mdmf_0(self) -> None: data = b"" - d = self.do_upload_mdmf(data=data) + result = await self.do_upload_mdmf(data=data) modes = [("all1", 0,0), ("all2", 0,None), ] - d.addCallback(self._test_partial_read, data, modes, 1) - return d + await self._test_partial_read(result, data, modes, 1) - def test_partial_read_mdmf_large(self): + async def test_partial_read_mdmf_large(self) -> None: segment_boundary = mathutil.next_multiple(128 * 1024, 3) modes = [("start_on_segment_boundary", segment_boundary, 50), ("ending_one_byte_after_segment_boundary", segment_boundary-50, 51), @@ -393,20 +311,18 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ ("complete_file1", 0, len(self.data)), ("complete_file2", 0, None), ] - d = self.do_upload_mdmf() - d.addCallback(self._test_partial_read, self.data, modes, 10000) - return d + result = await self.do_upload_mdmf() + await self._test_partial_read(result, self.data, modes, 10000) - def test_partial_read_sdmf_0(self): + async def test_partial_read_sdmf_0(self) -> None: data = b"" modes = [("all1", 0,0), ("all2", 0,None), ] - d = self.do_upload_sdmf(data=data) - d.addCallback(self._test_partial_read, data, modes, 1) - return d + result = await self.do_upload_sdmf(data=data) + await self._test_partial_read(result, data, modes, 1) - def test_partial_read_sdmf_2(self): + async def test_partial_read_sdmf_2(self) -> None: data = b"hi" modes = [("one_byte", 0, 1), ("last_byte", 1, 1), @@ -414,11 +330,10 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ ("complete_file", 0, 2), ("complete_file2", 0, None), ] - d = self.do_upload_sdmf(data=data) - d.addCallback(self._test_partial_read, data, modes, 1) - return d + result = await self.do_upload_sdmf(data=data) + await self._test_partial_read(result, data, modes, 1) - def test_partial_read_sdmf_90(self): + async def test_partial_read_sdmf_90(self) -> None: modes = [("start_at_middle", 50, 40), ("start_at_middle2", 50, None), ("zero_length_at_start", 0, 0), @@ -427,11 +342,10 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ ("complete_file1", 0, None), ("complete_file2", 0, 90), ] - d = self.do_upload_sdmf() - d.addCallback(self._test_partial_read, self.small_data, modes, 10) - return d + result = await self.do_upload_sdmf() + await self._test_partial_read(result, self.small_data, modes, 10) - def test_partial_read_sdmf_100(self): + async def test_partial_read_sdmf_100(self) -> None: data = b"test data "*10 modes = [("start_at_middle", 50, 50), ("start_at_middle2", 50, None), @@ -440,42 +354,30 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ ("complete_file1", 0, 100), ("complete_file2", 0, None), ] - d = self.do_upload_sdmf(data=data) - d.addCallback(self._test_partial_read, data, modes, 10) - return d + result = await self.do_upload_sdmf(data=data) + await self._test_partial_read(result, data, modes, 10) + async def _test_read_and_download(self, node, expected) -> None: + version = await node.get_best_readable_version() + c = consumer.MemoryConsumer() + await version.read(c) + self.assertThat(expected, Equals(b"".join(c.chunks))) - def _test_read_and_download(self, node, expected): - d = node.get_best_readable_version() - def _read_data(version): - c = consumer.MemoryConsumer() - c2 = consumer.MemoryConsumer() - d2 = defer.succeed(None) - d2.addCallback(lambda ignored: version.read(c)) - d2.addCallback(lambda ignored: - self.assertThat(expected, Equals(b"".join(c.chunks)))) + c2 = consumer.MemoryConsumer() + await version.read(c2, offset=0, size=len(expected)) + self.assertThat(expected, Equals(b"".join(c2.chunks))) - d2.addCallback(lambda ignored: version.read(c2, offset=0, - size=len(expected))) - d2.addCallback(lambda ignored: - self.assertThat(expected, Equals(b"".join(c2.chunks)))) - return d2 - d.addCallback(_read_data) - d.addCallback(lambda ignored: node.download_best_version()) - d.addCallback(lambda data: self.assertThat(expected, Equals(data))) - return d + data = await node.download_best_version() + self.assertThat(expected, Equals(data)) - def test_read_and_download_mdmf(self): - d = self.do_upload_mdmf() - d.addCallback(self._test_read_and_download, self.data) - return d + async def test_read_and_download_mdmf(self) -> None: + result = await self.do_upload_mdmf() + await self._test_read_and_download(result, self.data) - def test_read_and_download_sdmf(self): - d = self.do_upload_sdmf() - d.addCallback(self._test_read_and_download, self.small_data) - return d + async def test_read_and_download_sdmf(self) -> None: + result = await self.do_upload_sdmf() + await self._test_read_and_download(result, self.small_data) - def test_read_and_download_sdmf_zero_length(self): - d = self.do_upload_empty_sdmf() - d.addCallback(self._test_read_and_download, b"") - return d + async def test_read_and_download_sdmf_zero_length(self) -> None: + result = await self.do_upload_empty_sdmf() + await self._test_read_and_download(result, b"") diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 67d331430..2319e3ce1 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -1619,7 +1619,8 @@ class FakeMutableFile(object): # type: ignore # incomplete implementation return defer.succeed(None) class FakeNodeMaker(NodeMaker): - def create_mutable_file(self, contents=b"", keysize=None, version=None): + def create_mutable_file(self, contents=b"", keysize=None, version=None, keypair=None): + assert keypair is None, "FakeNodeMaker does not support externally supplied keypairs" return defer.succeed(FakeMutableFile(contents)) class FakeClient2(_Client): # type: ignore # tahoe-lafs/ticket/3573 diff --git a/src/allmydata/test/test_pipeline.py b/src/allmydata/test/test_pipeline.py deleted file mode 100644 index 31d952836..000000000 --- a/src/allmydata/test/test_pipeline.py +++ /dev/null @@ -1,198 +0,0 @@ -""" -Tests for allmydata.util.pipeline. - -Ported to Python 3. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -import gc - -from twisted.internet import defer -from twisted.trial import unittest -from twisted.python import log -from twisted.python.failure import Failure - -from allmydata.util import pipeline - - -class Pipeline(unittest.TestCase): - def pause(self, *args, **kwargs): - d = defer.Deferred() - self.calls.append( (d, args, kwargs) ) - return d - - def failUnlessCallsAre(self, expected): - #print(self.calls) - #print(expected) - self.failUnlessEqual(len(self.calls), len(expected), self.calls) - for i,c in enumerate(self.calls): - self.failUnlessEqual(c[1:], expected[i], str(i)) - - def test_basic(self): - self.calls = [] - finished = [] - p = pipeline.Pipeline(100) - - d = p.flush() # fires immediately - d.addCallbacks(finished.append, log.err) - self.failUnlessEqual(len(finished), 1) - finished = [] - - d = p.add(10, self.pause, "one") - # the call should start right away, and our return Deferred should - # fire right away - d.addCallbacks(finished.append, log.err) - self.failUnlessEqual(len(finished), 1) - self.failUnlessEqual(finished[0], None) - self.failUnlessCallsAre([ ( ("one",) , {} ) ]) - self.failUnlessEqual(p.gauge, 10) - - # pipeline: [one] - - finished = [] - d = p.add(20, self.pause, "two", kw=2) - # pipeline: [one, two] - - # the call and the Deferred should fire right away - d.addCallbacks(finished.append, log.err) - self.failUnlessEqual(len(finished), 1) - self.failUnlessEqual(finished[0], None) - self.failUnlessCallsAre([ ( ("one",) , {} ), - ( ("two",) , {"kw": 2} ), - ]) - self.failUnlessEqual(p.gauge, 30) - - self.calls[0][0].callback("one-result") - # pipeline: [two] - self.failUnlessEqual(p.gauge, 20) - - finished = [] - d = p.add(90, self.pause, "three", "posarg1") - # pipeline: [two, three] - flushed = [] - fd = p.flush() - fd.addCallbacks(flushed.append, log.err) - self.failUnlessEqual(flushed, []) - - # the call will be made right away, but the return Deferred will not, - # because the pipeline is now full. - d.addCallbacks(finished.append, log.err) - self.failUnlessEqual(len(finished), 0) - self.failUnlessCallsAre([ ( ("one",) , {} ), - ( ("two",) , {"kw": 2} ), - ( ("three", "posarg1"), {} ), - ]) - self.failUnlessEqual(p.gauge, 110) - - self.failUnlessRaises(pipeline.SingleFileError, p.add, 10, self.pause) - - # retiring either call will unblock the pipeline, causing the #3 - # Deferred to fire - self.calls[2][0].callback("three-result") - # pipeline: [two] - - self.failUnlessEqual(len(finished), 1) - self.failUnlessEqual(finished[0], None) - self.failUnlessEqual(flushed, []) - - # retiring call#2 will finally allow the flush() Deferred to fire - self.calls[1][0].callback("two-result") - self.failUnlessEqual(len(flushed), 1) - - def test_errors(self): - self.calls = [] - p = pipeline.Pipeline(100) - - d1 = p.add(200, self.pause, "one") - d2 = p.flush() - - finished = [] - d1.addBoth(finished.append) - self.failUnlessEqual(finished, []) - - flushed = [] - d2.addBoth(flushed.append) - self.failUnlessEqual(flushed, []) - - self.calls[0][0].errback(ValueError("oops")) - - self.failUnlessEqual(len(finished), 1) - f = finished[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - self.failUnlessIn("PipelineError", str(f.value)) - self.failUnlessIn("ValueError", str(f.value)) - r = repr(f.value) - self.failUnless("ValueError" in r, r) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - self.failUnlessEqual(len(flushed), 1) - f = flushed[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - # now that the pipeline is in the failed state, any new calls will - # fail immediately - - d3 = p.add(20, self.pause, "two") - - finished = [] - d3.addBoth(finished.append) - self.failUnlessEqual(len(finished), 1) - f = finished[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - r = repr(f.value) - self.failUnless("ValueError" in r, r) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - d4 = p.flush() - flushed = [] - d4.addBoth(flushed.append) - self.failUnlessEqual(len(flushed), 1) - f = flushed[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - def test_errors2(self): - self.calls = [] - p = pipeline.Pipeline(100) - - d1 = p.add(10, self.pause, "one") - d2 = p.add(20, self.pause, "two") - d3 = p.add(30, self.pause, "three") - d4 = p.flush() - - # one call fails, then the second one succeeds: make sure - # ExpandableDeferredList tolerates the second one - - flushed = [] - d4.addBoth(flushed.append) - self.failUnlessEqual(flushed, []) - - self.calls[0][0].errback(ValueError("oops")) - self.failUnlessEqual(len(flushed), 1) - f = flushed[0] - self.failUnless(isinstance(f, Failure)) - self.failUnless(f.check(pipeline.PipelineError)) - f2 = f.value.error - self.failUnless(f2.check(ValueError)) - - self.calls[1][0].callback("two-result") - self.calls[2][0].errback(ValueError("three-error")) - - del d1,d2,d3,d4 - gc.collect() # for PyPy diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 134609f81..9b9d2d8de 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -3,14 +3,9 @@ Tests for allmydata.storage. Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from future.utils import native_str, PY2, bytes_to_native_str, bchr -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations +from future.utils import native_str, bytes_to_native_str, bchr from six import ensure_str from io import ( @@ -59,7 +54,7 @@ from allmydata.storage.common import storage_index_to_dir, \ si_b2a, si_a2b from allmydata.storage.lease import LeaseInfo from allmydata.immutable.layout import WriteBucketProxy, WriteBucketProxy_v2, \ - ReadBucketProxy + ReadBucketProxy, _WriteBuffer from allmydata.mutable.layout import MDMFSlotWriteProxy, MDMFSlotReadProxy, \ LayoutInvalid, MDMFSIGNABLEHEADER, \ SIGNED_PREFIX, MDMFHEADER, \ @@ -3746,3 +3741,39 @@ class LeaseInfoTests(SyncTestCase): info.to_mutable_data(), HasLength(info.mutable_size()), ) + + +class WriteBufferTests(SyncTestCase): + """Tests for ``_WriteBuffer``.""" + + @given( + small_writes=strategies.lists( + strategies.binary(min_size=1, max_size=20), + min_size=10, max_size=20), + batch_size=strategies.integers(min_value=5, max_value=10) + ) + def test_write_buffer(self, small_writes: list[bytes], batch_size: int): + """ + ``_WriteBuffer`` coalesces small writes into bigger writes based on + the batch size. + """ + wb = _WriteBuffer(batch_size) + result = b"" + for data in small_writes: + should_flush = wb.queue_write(data) + if should_flush: + flushed_offset, flushed_data = wb.flush() + self.assertEqual(flushed_offset, len(result)) + # The flushed data is in batch sizes, or closest approximation + # given queued inputs: + self.assertTrue(batch_size <= len(flushed_data) < batch_size + len(data)) + result += flushed_data + + # Final flush: + remaining_length = wb.get_queued_bytes() + flushed_offset, flushed_data = wb.flush() + self.assertEqual(remaining_length, len(flushed_data)) + self.assertEqual(flushed_offset, len(result)) + result += flushed_data + + self.assertEqual(result, b"".join(small_writes)) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index 8dbe18545..55754b29b 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -37,6 +37,7 @@ from twisted.web import http from twisted.web.http_headers import Headers from werkzeug import routing from werkzeug.exceptions import NotFound as WNotFound +from testtools.matchers import Equals from .common import SyncTestCase from ..storage.http_common import get_content_type, CBOR_MIME_TYPE @@ -555,6 +556,20 @@ class GenericHTTPAPITests(SyncTestCase): super(GenericHTTPAPITests, self).setUp() self.http = self.useFixture(HttpTestFixture()) + def test_missing_authentication(self) -> None: + """ + If nothing is given in the ``Authorization`` header at all an + ``Unauthorized`` response is returned. + """ + client = StubTreq(self.http.http_server.get_resource()) + response = self.http.result_of_with_flush( + client.request( + "GET", + "http://127.0.0.1/storage/v1/version", + ), + ) + self.assertThat(response.code, Equals(http.UNAUTHORIZED)) + def test_bad_authentication(self): """ If the wrong swissnum is used, an ``Unauthorized`` response code is @@ -1186,18 +1201,42 @@ class MutableHTTPAPIsTests(SyncTestCase): ) return storage_index, write_secret, lease_secret - def test_write_can_be_read(self): + def test_write_can_be_read_small_data(self): + """ + Small written data can be read using ``read_share_chunk``. + """ + self.write_can_be_read(b"abcdef") + + def test_write_can_be_read_large_data(self): + """ + Large written data (50MB) can be read using ``read_share_chunk``. + """ + self.write_can_be_read(b"abcdefghij" * 5 * 1024 * 1024) + + def write_can_be_read(self, data): """ Written data can be read using ``read_share_chunk``. """ - storage_index, _, _ = self.create_upload() - data0 = self.http.result_of_with_flush( - self.mut_client.read_share_chunk(storage_index, 0, 1, 7) + lease_secret = urandom(32) + storage_index = urandom(16) + self.http.result_of_with_flush( + self.mut_client.read_test_write_chunks( + storage_index, + urandom(32), + lease_secret, + lease_secret, + { + 0: TestWriteVectors( + write_vectors=[WriteVector(offset=0, data=data)] + ), + }, + [], + ) ) - data1 = self.http.result_of_with_flush( - self.mut_client.read_share_chunk(storage_index, 1, 0, 8) + read_data = self.http.result_of_with_flush( + self.mut_client.read_share_chunk(storage_index, 0, 0, len(data)) ) - self.assertEqual((data0, data1), (b"bcdef-0", b"abcdef-1")) + self.assertEqual(read_data, data) def test_read_before_write(self): """In combo read/test/write operation, reads happen before writes.""" @@ -1276,15 +1315,6 @@ class MutableHTTPAPIsTests(SyncTestCase): b"aXYZef-0", ) - def test_too_large_write(self): - """ - Writing too large of a chunk results in a REQUEST ENTITY TOO LARGE http - error. - """ - with self.assertRaises(ClientException) as e: - self.create_upload(b"0123456789" * 1024 * 1024) - self.assertEqual(e.exception.code, http.REQUEST_ENTITY_TOO_LARGE) - def test_list_shares(self): """``list_shares()`` returns the shares for a given storage index.""" storage_index, _, _ = self.create_upload() diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 670ac5868..10a64c1fe 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -34,7 +34,7 @@ from allmydata.util.encodingutil import quote_output, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ - NoSuchChildError, NoSharesError + NoSuchChildError, NoSharesError, SDMF_VERSION, MDMF_VERSION from allmydata.monitor import Monitor from allmydata.mutable.common import NotWriteableError from allmydata.mutable import layout as mutable_layout @@ -477,9 +477,10 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def _corrupt_mutable_share(self, filename, which): msf = MutableShareFile(filename) - datav = msf.readv([ (0, 1000000) ]) + # Read more than share length: + datav = msf.readv([ (0, 10_000_000) ]) final_share = datav[0] - assert len(final_share) < 1000000 # ought to be truncated + assert len(final_share) < 10_000_000 # ought to be truncated pieces = mutable_layout.unpack_share(final_share) (seqnum, root_hash, IV, k, N, segsize, datalen, verification_key, signature, share_hash_chain, block_hash_tree, @@ -519,12 +520,20 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): msf.writev( [(0, final_share)], None) - def test_mutable(self): + def test_mutable_sdmf(self): + """SDMF mutables can be uploaded, downloaded, and many other things.""" + return self._test_mutable(SDMF_VERSION) + + def test_mutable_mdmf(self): + """MDMF mutables can be uploaded, downloaded, and many other things.""" + return self._test_mutable(MDMF_VERSION) + + def _test_mutable(self, mutable_version): DATA = b"initial contents go here." # 25 bytes % 3 != 0 DATA_uploadable = MutableData(DATA) NEWDATA = b"new contents yay" NEWDATA_uploadable = MutableData(NEWDATA) - NEWERDATA = b"this is getting old" + NEWERDATA = b"this is getting old" * 1_000_000 NEWERDATA_uploadable = MutableData(NEWERDATA) d = self.set_up_nodes() @@ -532,7 +541,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def _create_mutable(res): c = self.clients[0] log.msg("starting create_mutable_file") - d1 = c.create_mutable_file(DATA_uploadable) + d1 = c.create_mutable_file(DATA_uploadable, mutable_version) def _done(res): log.msg("DONE: %s" % (res,)) self._mutable_node_1 = res @@ -554,27 +563,33 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): filename) self.failUnlessEqual(rc, 0) try: + share_type = 'SDMF' if mutable_version == SDMF_VERSION else 'MDMF' self.failUnless("Mutable slot found:\n" in output) - self.failUnless("share_type: SDMF\n" in output) + self.assertIn(f"share_type: {share_type}\n", output) peerid = idlib.nodeid_b2a(self.clients[client_num].nodeid) self.failUnless(" WE for nodeid: %s\n" % peerid in output) self.failUnless(" num_extra_leases: 0\n" in output) self.failUnless(" secrets are for nodeid: %s\n" % peerid in output) - self.failUnless(" SDMF contents:\n" in output) + self.failUnless(f" {share_type} contents:\n" in output) self.failUnless(" seqnum: 1\n" in output) self.failUnless(" required_shares: 3\n" in output) self.failUnless(" total_shares: 10\n" in output) - self.failUnless(" segsize: 27\n" in output, (output, filename)) + if mutable_version == SDMF_VERSION: + self.failUnless(" segsize: 27\n" in output, (output, filename)) self.failUnless(" datalen: 25\n" in output) # the exact share_hash_chain nodes depends upon the sharenum, # and is more of a hassle to compute than I want to deal with # now self.failUnless(" share_hash_chain: " in output) self.failUnless(" block_hash_tree: 1 nodes\n" in output) - expected = (" verify-cap: URI:SSK-Verifier:%s:" % - str(base32.b2a(storage_index), "ascii")) - self.failUnless(expected in output) + if mutable_version == SDMF_VERSION: + expected = (" verify-cap: URI:SSK-Verifier:%s:" % + str(base32.b2a(storage_index), "ascii")) + else: + expected = (" verify-cap: URI:MDMF-Verifier:%s" % + str(base32.b2a(storage_index), "ascii")) + self.assertIn(expected, output) except unittest.FailTest: print() print("dump-share output was:") @@ -694,7 +709,10 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): # when we retrieve this, we should get three signature # failures (where we've mangled seqnum, R, and segsize). The # pubkey mangling - d.addCallback(_corrupt_shares) + + if mutable_version == SDMF_VERSION: + # TODO Corrupting shares in test_systm doesn't work for MDMF right now + d.addCallback(_corrupt_shares) d.addCallback(lambda res: self._newnode3.download_best_version()) d.addCallback(_check_download_5) @@ -702,7 +720,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def _check_empty_file(res): # make sure we can create empty files, this usually screws up the # segsize math - d1 = self.clients[2].create_mutable_file(MutableData(b"")) + d1 = self.clients[2].create_mutable_file(MutableData(b""), mutable_version) d1.addCallback(lambda newnode: newnode.download_best_version()) d1.addCallback(lambda res: self.failUnlessEqual(b"", res)) return d1 diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index 03cd6e560..4c828817a 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -1,19 +1,14 @@ """ -Ported to Python 3. +Tests for a bunch of web-related APIs. """ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals +from __future__ import annotations -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from six import ensure_binary import os.path, re, time import treq from urllib.parse import quote as urlquote, unquote as urlunquote +from base64 import urlsafe_b64encode from bs4 import BeautifulSoup @@ -38,6 +33,7 @@ from allmydata.util import fileutil, base32, hashutil, jsonbytes as json from allmydata.util.consumer import download_to_data from allmydata.util.encodingutil import to_bytes from ...util.connection_status import ConnectionStatus +from ...crypto.rsa import PublicKey, PrivateKey, create_signing_keypair, der_string_from_signing_key from ..common import ( EMPTY_CLIENT_CONFIG, FakeCHKFileNode, @@ -65,6 +61,7 @@ from allmydata.interfaces import ( MustBeReadonlyError, ) from allmydata.mutable import servermap, publish, retrieve +from allmydata.mutable.common import derive_mutable_keys from .. import common_util as testutil from ..common_util import TimezoneMixin from ..common_web import ( @@ -93,6 +90,7 @@ class FakeNodeMaker(NodeMaker): 'happy': 7, 'max_segment_size':128*1024 # 1024=KiB } + all_contents: dict[bytes, object] def _create_lit(self, cap): return FakeCHKFileNode(cap, self.all_contents) def _create_immutable(self, cap): @@ -100,11 +98,19 @@ class FakeNodeMaker(NodeMaker): def _create_mutable(self, cap): return FakeMutableFileNode(None, None, self.encoding_params, None, - self.all_contents).init_from_cap(cap) - def create_mutable_file(self, contents=b"", keysize=None, - version=SDMF_VERSION): + self.all_contents, None).init_from_cap(cap) + def create_mutable_file(self, + contents=None, + version=None, + keypair: tuple[PublicKey, PrivateKey] | None=None, + ): + if contents is None: + contents = b"" + if version is None: + version = SDMF_VERSION + n = FakeMutableFileNode(None, None, self.encoding_params, None, - self.all_contents) + self.all_contents, keypair) return n.create(contents, version=version) class FakeUploader(service.Service): @@ -2868,6 +2874,41 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi "Unknown format: foo", method="post", data=body, headers=headers) + async def test_POST_upload_keypair(self) -> None: + """ + A *POST* creating a new mutable object may include a *private-key* + query argument giving a urlsafe-base64-encoded RSA private key to use + as the "signature key". The given signature key is used, rather than + a new one being generated. + """ + format = "sdmf" + priv, pub = create_signing_keypair(2048) + encoded_privkey = urlsafe_b64encode(der_string_from_signing_key(priv)).decode("ascii") + filename = "predetermined-sdmf" + expected_content = self.NEWFILE_CONTENTS * 100 + actual_cap = uri.from_string(await self.POST( + self.public_url + + f"/foo?t=upload&format={format}&private-key={encoded_privkey}", + file=(filename, expected_content), + )) + # Ideally we would inspect the private ("signature") and public + # ("verification") keys but they are not made easily accessible here + # (ostensibly because we have a FakeMutableFileNode instead of a real + # one). + # + # So, instead, re-compute the writekey and fingerprint and compare + # those against the capability string. + expected_writekey, _, expected_fingerprint = derive_mutable_keys((pub, priv)) + self.assertEqual( + (expected_writekey, expected_fingerprint), + (actual_cap.writekey, actual_cap.fingerprint), + ) + + # And the capability we got can be used to download the data we + # uploaded. + downloaded_content = await self.GET(f"/uri/{actual_cap.to_string().decode('ascii')}") + self.assertEqual(expected_content, downloaded_content) + def test_POST_upload_format(self): def _check_upload(ign, format, uri_prefix, fn=None): filename = format + ".txt" diff --git a/src/allmydata/test/web/test_webish.py b/src/allmydata/test/web/test_webish.py index 4a77d21ae..050f77d1c 100644 --- a/src/allmydata/test/web/test_webish.py +++ b/src/allmydata/test/web/test_webish.py @@ -202,6 +202,16 @@ class TahoeLAFSSiteTests(SyncTestCase): ), ) + def test_private_key_censoring(self): + """ + The log event for a request including a **private-key** query + argument has the private key value censored. + """ + self._test_censoring( + b"/uri?uri=URI:CHK:aaa:bbb&private-key=AAAAaaaabbbb==", + b"/uri?uri=[CENSORED]&private-key=[CENSORED]", + ) + def test_uri_censoring(self): """ The log event for a request for **/uri/** has the capability value diff --git a/src/allmydata/util/pipeline.py b/src/allmydata/util/pipeline.py deleted file mode 100644 index 31f5d5d49..000000000 --- a/src/allmydata/util/pipeline.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -A pipeline of Deferreds. - -Ported to Python 3. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -from twisted.internet import defer -from twisted.python.failure import Failure -from twisted.python import log -from allmydata.util.assertutil import precondition - - -class PipelineError(Exception): - """One of the pipelined messages returned an error. The received Failure - object is stored in my .error attribute.""" - def __init__(self, error): - self.error = error - - def __repr__(self): - return "" % (self.error,) - def __str__(self): - return "" % (self.error,) - -class SingleFileError(Exception): - """You are not permitted to add a job to a full pipeline.""" - - -class ExpandableDeferredList(defer.Deferred, object): - # like DeferredList(fireOnOneErrback=True) with a built-in - # gatherResults(), but you can add new Deferreds until you close it. This - # gives you a chance to add don't-complain-about-unhandled-error errbacks - # immediately after attachment, regardless of whether you actually end up - # wanting the list or not. - def __init__(self): - defer.Deferred.__init__(self) - self.resultsReceived = 0 - self.resultList = [] - self.failure = None - self.closed = False - - def addDeferred(self, d): - precondition(not self.closed, "don't call addDeferred() on a closed ExpandableDeferredList") - index = len(self.resultList) - self.resultList.append(None) - d.addCallbacks(self._cbDeferred, self._ebDeferred, - callbackArgs=(index,)) - return d - - def close(self): - self.closed = True - self.checkForFinished() - - def checkForFinished(self): - if not self.closed: - return - if self.called: - return - if self.failure: - self.errback(self.failure) - elif self.resultsReceived == len(self.resultList): - self.callback(self.resultList) - - def _cbDeferred(self, res, index): - self.resultList[index] = res - self.resultsReceived += 1 - self.checkForFinished() - return res - - def _ebDeferred(self, f): - self.failure = f - self.checkForFinished() - return f - - -class Pipeline(object): - """I manage a size-limited pipeline of Deferred operations, usually - callRemote() messages.""" - - def __init__(self, capacity): - self.capacity = capacity # how full we can be - self.gauge = 0 # how full we are - self.failure = None - self.waiting = [] # callers of add() who are blocked - self.unflushed = ExpandableDeferredList() - - def add(self, _size, _func, *args, **kwargs): - # We promise that all the Deferreds we return will fire in the order - # they were returned. To make it easier to keep this promise, we - # prohibit multiple outstanding calls to add() . - if self.waiting: - raise SingleFileError - if self.failure: - return defer.fail(self.failure) - self.gauge += _size - fd = defer.maybeDeferred(_func, *args, **kwargs) - fd.addBoth(self._call_finished, _size) - self.unflushed.addDeferred(fd) - fd.addErrback(self._eat_pipeline_errors) - fd.addErrback(log.err, "_eat_pipeline_errors didn't eat it") - if self.gauge < self.capacity: - return defer.succeed(None) - d = defer.Deferred() - self.waiting.append(d) - return d - - def flush(self): - if self.failure: - return defer.fail(self.failure) - d, self.unflushed = self.unflushed, ExpandableDeferredList() - d.close() - d.addErrback(self._flushed_error) - return d - - def _flushed_error(self, f): - precondition(self.failure) # should have been set by _call_finished - return self.failure - - def _call_finished(self, res, size): - self.gauge -= size - if isinstance(res, Failure): - res = Failure(PipelineError(res)) - if not self.failure: - self.failure = res - if self.failure: - while self.waiting: - d = self.waiting.pop(0) - d.errback(self.failure) - else: - while self.waiting and (self.gauge < self.capacity): - d = self.waiting.pop(0) - d.callback(None) - # the d.callback() might trigger a new call to add(), which - # will raise our gauge and might cause the pipeline to be - # filled. So the while() loop gets a chance to tell the - # caller to stop. - return res - - def _eat_pipeline_errors(self, f): - f.trap(PipelineError) - return None diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index bf89044a3..c49354217 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -1,26 +1,17 @@ """ Ported to Python 3. """ -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 - from past.builtins import unicode as str # prevent leaking newbytes/newstr into code that can't handle it +from __future__ import annotations from six import ensure_str -try: - from typing import Optional, Union, Tuple, Any -except ImportError: - pass +from typing import Optional, Union, TypeVar, overload +from typing_extensions import Literal import time import json from functools import wraps +from base64 import urlsafe_b64decode from hyperlink import ( DecodedURL, @@ -94,7 +85,7 @@ from allmydata.util.encodingutil import ( to_bytes, ) from allmydata.util import abbreviate - +from allmydata.crypto.rsa import PrivateKey, PublicKey, create_signing_keypair_from_string class WebError(Exception): def __init__(self, text, code=http.BAD_REQUEST): @@ -713,8 +704,15 @@ def url_for_string(req, url_string): ) return url +T = TypeVar("T") -def get_arg(req, argname, default=None, multiple=False): # type: (IRequest, Union[bytes,str], Any, bool) -> Union[bytes,Tuple[bytes],Any] +@overload +def get_arg(req: IRequest, argname: str | bytes, default: T = None, *, multiple: Literal[False] = False) -> T | bytes: ... + +@overload +def get_arg(req: IRequest, argname: str | bytes, default: T = None, *, multiple: Literal[True]) -> T | tuple[bytes, ...]: ... + +def get_arg(req: IRequest, argname: str | bytes, default: T = None, *, multiple: bool = False) -> None | T | bytes | tuple[bytes, ...]: """Extract an argument from either the query args (req.args) or the form body fields (req.fields). If multiple=False, this returns a single value (or the default, which defaults to None), and the query args take @@ -726,13 +724,14 @@ def get_arg(req, argname, default=None, multiple=False): # type: (IRequest, Uni :return: Either bytes or tuple of bytes. """ if isinstance(argname, str): - argname = argname.encode("utf-8") - if isinstance(default, str): - default = default.encode("utf-8") + argname_bytes = argname.encode("utf-8") + else: + argname_bytes = argname + results = [] - if argname in req.args: - results.extend(req.args[argname]) - argname_unicode = str(argname, "utf-8") + if argname_bytes in req.args: + results.extend(req.args[argname_bytes]) + argname_unicode = str(argname_bytes, "utf-8") if req.fields and argname_unicode in req.fields: value = req.fields[argname_unicode].value if isinstance(value, str): @@ -742,6 +741,9 @@ def get_arg(req, argname, default=None, multiple=False): # type: (IRequest, Uni return tuple(results) if results: return results[0] + + if isinstance(default, str): + return default.encode("utf-8") return default @@ -833,3 +835,14 @@ def abbreviate_time(data): if s >= 0.001: return u"%.1fms" % (1000*s) return u"%.0fus" % (1000000*s) + +def get_keypair(request: IRequest) -> tuple[PublicKey, PrivateKey] | None: + """ + Load a keypair from a urlsafe-base64-encoded RSA private key in the + **private-key** argument of the given request, if there is one. + """ + privkey_der = get_arg(request, "private-key", default=None, multiple=False) + if privkey_der is None: + return None + privkey, pubkey = create_signing_keypair_from_string(urlsafe_b64decode(privkey_der)) + return pubkey, privkey diff --git a/src/allmydata/web/filenode.py b/src/allmydata/web/filenode.py index dd793888e..680ca3331 100644 --- a/src/allmydata/web/filenode.py +++ b/src/allmydata/web/filenode.py @@ -1,23 +1,12 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 - # Use native unicode() as str() to prevent leaking futurebytes in ways that - # break string formattin. - from past.builtins import unicode as str -from past.builtins import long +from __future__ import annotations from twisted.web import http, static from twisted.internet import defer from twisted.web.resource import ( - Resource, # note: Resource is an old-style class + Resource, ErrorPage, ) @@ -34,6 +23,7 @@ from allmydata.blacklist import ( ) from allmydata.web.common import ( + get_keypair, boolean_of_arg, exception_to_child, get_arg, @@ -56,7 +46,6 @@ from allmydata.web.check_results import ( from allmydata.web.info import MoreInfo from allmydata.util import jsonbytes as json - class ReplaceMeMixin(object): def replace_me_with_a_child(self, req, client, replace): # a new file is being uploaded in our place. @@ -64,7 +53,8 @@ class ReplaceMeMixin(object): mutable_type = get_mutable_type(file_format) if mutable_type is not None: data = MutableFileHandle(req.content) - d = client.create_mutable_file(data, version=mutable_type) + keypair = get_keypair(req) + d = client.create_mutable_file(data, version=mutable_type, unique_keypair=keypair) def _uploaded(newnode): d2 = self.parentnode.set_node(self.name, newnode, overwrite=replace) @@ -106,7 +96,8 @@ class ReplaceMeMixin(object): if file_format in ("SDMF", "MDMF"): mutable_type = get_mutable_type(file_format) uploadable = MutableFileHandle(contents.file) - d = client.create_mutable_file(uploadable, version=mutable_type) + keypair = get_keypair(req) + d = client.create_mutable_file(uploadable, version=mutable_type, unique_keypair=keypair) def _uploaded(newnode): d2 = self.parentnode.set_node(self.name, newnode, overwrite=replace) @@ -395,7 +386,7 @@ class FileDownloader(Resource, object): # list of (first,last) inclusive range tuples. filesize = self.filenode.get_size() - assert isinstance(filesize, (int,long)), filesize + assert isinstance(filesize, int), filesize try: # byte-ranges-specifier @@ -408,19 +399,19 @@ class FileDownloader(Resource, object): if first == '': # suffix-byte-range-spec - first = filesize - long(last) + first = filesize - int(last) last = filesize - 1 else: # byte-range-spec # first-byte-pos - first = long(first) + first = int(first) # last-byte-pos if last == '': last = filesize - 1 else: - last = long(last) + last = int(last) if last < first: raise ValueError @@ -456,7 +447,7 @@ class FileDownloader(Resource, object): b'attachment; filename="%s"' % self.filename) filesize = self.filenode.get_size() - assert isinstance(filesize, (int,long)), filesize + assert isinstance(filesize, int), filesize first, size = 0, None contentsize = filesize req.setHeader("accept-ranges", "bytes") diff --git a/src/allmydata/web/unlinked.py b/src/allmydata/web/unlinked.py index 425622496..2c7be6f30 100644 --- a/src/allmydata/web/unlinked.py +++ b/src/allmydata/web/unlinked.py @@ -1,14 +1,7 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations from urllib.parse import quote as urlquote @@ -25,6 +18,7 @@ from twisted.web.template import ( from allmydata.immutable.upload import FileHandle from allmydata.mutable.publish import MutableFileHandle from allmydata.web.common import ( + get_keypair, get_arg, boolean_of_arg, convert_children_json, @@ -48,7 +42,8 @@ def PUTUnlinkedSSK(req, client, version): # SDMF: files are small, and we can only upload data req.content.seek(0) data = MutableFileHandle(req.content) - d = client.create_mutable_file(data, version=version) + keypair = get_keypair(req) + d = client.create_mutable_file(data, version=version, unique_keypair=keypair) d.addCallback(lambda n: n.get_uri()) return d diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 519b3e1f0..1b2b8192a 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -1,18 +1,12 @@ """ -Ported to Python 3. +General web server-related utilities. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations from six import ensure_str import re, time, tempfile +from urllib.parse import parse_qsl, urlencode from cgi import ( FieldStorage, @@ -45,40 +39,37 @@ from .web.storage_plugins import ( ) -if PY2: - FileUploadFieldStorage = FieldStorage -else: - class FileUploadFieldStorage(FieldStorage): - """ - Do terrible things to ensure files are still bytes. +class FileUploadFieldStorage(FieldStorage): + """ + Do terrible things to ensure files are still bytes. - On Python 2, uploaded files were always bytes. On Python 3, there's a - heuristic: if the filename is set on a field, it's assumed to be a file - upload and therefore bytes. If no filename is set, it's Unicode. + On Python 2, uploaded files were always bytes. On Python 3, there's a + heuristic: if the filename is set on a field, it's assumed to be a file + upload and therefore bytes. If no filename is set, it's Unicode. - Unfortunately, we always want it to be bytes, and Tahoe-LAFS also - enables setting the filename not via the MIME filename, but via a - separate field called "name". + Unfortunately, we always want it to be bytes, and Tahoe-LAFS also + enables setting the filename not via the MIME filename, but via a + separate field called "name". - Thus we need to do this ridiculous workaround. Mypy doesn't like it - either, thus the ``# type: ignore`` below. + Thus we need to do this ridiculous workaround. Mypy doesn't like it + either, thus the ``# type: ignore`` below. - Source for idea: - https://mail.python.org/pipermail/python-dev/2017-February/147402.html - """ - @property # type: ignore - def filename(self): - if self.name == "file" and not self._mime_filename: - # We use the file field to upload files, see directory.py's - # _POST_upload. Lack of _mime_filename means we need to trick - # FieldStorage into thinking there is a filename so it'll - # return bytes. - return "unknown-filename" - return self._mime_filename + Source for idea: + https://mail.python.org/pipermail/python-dev/2017-February/147402.html + """ + @property # type: ignore + def filename(self): + if self.name == "file" and not self._mime_filename: + # We use the file field to upload files, see directory.py's + # _POST_upload. Lack of _mime_filename means we need to trick + # FieldStorage into thinking there is a filename so it'll + # return bytes. + return "unknown-filename" + return self._mime_filename - @filename.setter - def filename(self, value): - self._mime_filename = value + @filename.setter + def filename(self, value): + self._mime_filename = value class TahoeLAFSRequest(Request, object): @@ -180,12 +171,7 @@ def _logFormatter(logDateTime, request): queryargs = b"" else: path, queryargs = x - # there is a form handler which redirects POST /uri?uri=FOO into - # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make - # sure we censor these too. - if queryargs.startswith(b"uri="): - queryargs = b"uri=[CENSORED]" - queryargs = b"?" + queryargs + queryargs = b"?" + censor(queryargs) if path.startswith(b"/uri/"): path = b"/uri/[CENSORED]" elif path.startswith(b"/file/"): @@ -207,6 +193,30 @@ def _logFormatter(logDateTime, request): ) +def censor(queryargs: bytes) -> bytes: + """ + Replace potentially sensitive values in query arguments with a + constant string. + """ + args = parse_qsl(queryargs.decode("ascii"), keep_blank_values=True, encoding="utf8") + result = [] + for k, v in args: + if k == "uri": + # there is a form handler which redirects POST /uri?uri=FOO into + # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make + # sure we censor these. + v = "[CENSORED]" + elif k == "private-key": + # Likewise, sometimes a private key is supplied with mutable + # creation. + v = "[CENSORED]" + + result.append((k, v)) + + # Customize safe to try to leave our markers intact. + return urlencode(result, safe="[]").encode("ascii") + + class TahoeLAFSSite(Site, object): """ The HTTP protocol factory used by Tahoe-LAFS. diff --git a/tests.nix b/tests.nix index dd477c273..f8ed678f3 100644 --- a/tests.nix +++ b/tests.nix @@ -5,7 +5,7 @@ in { pkgsVersion ? "nixpkgs-21.11" , pkgs ? import sources.${pkgsVersion} { } , pypiData ? sources.pypi-deps-db -, pythonVersion ? "python37" +, pythonVersion ? "python39" , mach-nix ? import sources.mach-nix { inherit pkgs pypiData; python = pythonVersion; @@ -21,7 +21,7 @@ let inherit pkgs; lib = pkgs.lib; }; - tests_require = (mach-lib.extract "python37" ./. "extras_require" ).extras_require.test; + tests_require = (mach-lib.extract "python39" ./. "extras_require" ).extras_require.test; # Get the Tahoe-LAFS package itself. This does not include test # requirements and we don't ask for test requirements so that we can just diff --git a/tox.ini b/tox.ini index db4748033..3e2dacbb2 100644 --- a/tox.ini +++ b/tox.ini @@ -7,11 +7,9 @@ # the tox-gh-actions package. [gh-actions] python = - 3.7: py37-coverage,typechecks,codechecks 3.8: py38-coverage 3.9: py39-coverage 3.10: py310-coverage - pypy-3.7: pypy37 pypy-3.8: pypy38 pypy-3.9: pypy39 @@ -19,7 +17,7 @@ python = twisted = 1 [tox] -envlist = typechecks,codechecks,py{37,38,39,310}-{coverage},pypy27,pypy37,pypy38,pypy39,integration +envlist = typechecks,codechecks,py{38,39,310}-{coverage},pypy27,pypy38,pypy39,integration minversion = 2.4 [testenv] @@ -49,8 +47,6 @@ deps = # regressions in new releases of this package that cause us the kind of # suffering we're trying to avoid with the above pins. certifi - # VCS hooks support - py37,!coverage: pre-commit # We add usedevelop=False because testing against a true installation gives # more useful results.