diff --git a/.circleci/config.yml b/.circleci/config.yml index ab0573a3f..54b2706cd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -11,22 +11,36 @@ # version: 2.1 -# A template that can be shared between the two different image-building +# Every job that pushes a Docker image from Docker Hub must authenticate to +# it. Define a couple yaml anchors that can be used to supply the necessary +# credentials. + +# First is a CircleCI job context which makes Docker Hub credentials available +# in the environment. +# +# Contexts are managed in the CircleCI web interface: +# +# https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts +dockerhub-context-template: &DOCKERHUB_CONTEXT + context: "dockerhub-auth" + +# Next is a Docker executor template that gets the credentials from the +# environment and supplies them to the executor. +dockerhub-auth-template: &DOCKERHUB_AUTH + - auth: + username: $DOCKERHUB_USERNAME + password: $DOCKERHUB_PASSWORD + + # A template that can be shared between the two different image-building # workflows. .images: &IMAGES jobs: - # Every job that pushes a Docker image from Docker Hub needs to provide - # credentials. Use this first job to define a yaml anchor that can be - # used to supply a CircleCI job context which makes Docker Hub credentials - # available in the environment. - # - # Contexts are managed in the CircleCI web interface: - # - # https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts - - "build-image-debian-11": &DOCKERHUB_CONTEXT + - "build-image-debian-11": <<: *DOCKERHUB_CONTEXT - "build-image-ubuntu-20-04": <<: *DOCKERHUB_CONTEXT + - "build-image-ubuntu-22-04": + <<: *DOCKERHUB_CONTEXT - "build-image-fedora-35": <<: *DOCKERHUB_CONTEXT - "build-image-oraclelinux-8": @@ -66,17 +80,30 @@ workflows: - "ubuntu-20-04": {} + - "ubuntu-22-04": + {} + # Equivalent to RHEL 8; CentOS 8 is dead. - "oraclelinux-8": {} - "nixos": - name: "NixOS 22.11" + name: "<>" nixpkgs: "22.11" + matrix: + parameters: + pythonVersion: + - "python38" + - "python39" + - "python310" - "nixos": - name: "NixOS unstable" + name: "<>" nixpkgs: "unstable" + matrix: + parameters: + pythonVersion: + - "python311" # Eventually, test against PyPy 3.8 #- "pypy27-buster": @@ -113,30 +140,7 @@ workflows: # Build as part of the workflow but only if requested. when: "<< pipeline.parameters.build-images >>" - jobs: - dockerhub-auth-template: - # This isn't a real job. It doesn't get scheduled as part of any - # workflow. Instead, it's just a place we can hang a yaml anchor to - # finish the Docker Hub authentication configuration. Workflow jobs using - # the DOCKERHUB_CONTEXT anchor will have access to the environment - # variables used here. These variables will allow the Docker Hub image - # pull to be authenticated and hopefully avoid hitting and rate limits. - docker: &DOCKERHUB_AUTH - - image: "null" - auth: - username: $DOCKERHUB_USERNAME - password: $DOCKERHUB_PASSWORD - - steps: - - run: - name: "CircleCI YAML schema conformity" - command: | - # This isn't a real command. We have to have something in this - # space, though, or the CircleCI yaml schema validator gets angry. - # Since this job is never scheduled this step is never run so the - # actual value here is irrelevant. - codechecks: docker: - <<: *DOCKERHUB_AUTH @@ -256,7 +260,7 @@ jobs: name: "Submit coverage results" command: | if [ -n "${UPLOAD_COVERAGE}" ]; then - /tmp/venv/bin/codecov + echo "TODO: Need a new coverage solution, see https://tahoe-lafs.org/trac/tahoe-lafs/ticket/4011" fi docker: @@ -336,6 +340,16 @@ jobs: <<: *UTF_8_ENVIRONMENT TAHOE_LAFS_TOX_ENVIRONMENT: "py39" + ubuntu-22-04: + <<: *DEBIAN + docker: + - <<: *DOCKERHUB_AUTH + image: "tahoelafsci/ubuntu:22.04-py3.10" + user: "nobody" + environment: + <<: *UTF_8_ENVIRONMENT + TAHOE_LAFS_TOX_ENVIRONMENT: "py310" + oraclelinux-8: &RHEL_DERIV docker: - <<: *DOCKERHUB_AUTH @@ -374,56 +388,29 @@ jobs: Reference the name of a niv-managed nixpkgs source (see `niv show` and nix/sources.json) type: "string" + pythonVersion: + description: >- + Reference the name of a Python package in nixpkgs to use. + type: "string" - docker: - # Run in a highly Nix-capable environment. - - <<: *DOCKERHUB_AUTH - image: "nixos/nix:2.10.3" - - environment: - # CACHIX_AUTH_TOKEN is manually set in the CircleCI web UI and - # allows us to push to CACHIX_NAME. We only need this set for - # `cachix use` in this step. - CACHIX_NAME: "tahoe-lafs-opensource" + executor: "nix" steps: - - "run": - # Get cachix for Nix-friendly caching. - name: "Install Basic Dependencies" - command: | - NIXPKGS="https://github.com/nixos/nixpkgs/archive/nixos-<>.tar.gz" - nix-env \ - --file $NIXPKGS \ - --install \ - -A cachix bash - # Activate it for "binary substitution". This sets up - # configuration tht lets Nix download something from the cache - # instead of building it locally, if possible. - cachix use "${CACHIX_NAME}" - - - "checkout" - - - "run": - # The Nix package doesn't know how to do this part, unfortunately. - name: "Generate version" - command: | - nix-shell \ - -p 'python3.withPackages (ps: [ ps.setuptools ])' \ - --run 'python setup.py update_version' - - - "run": - name: "Test" - command: | - # CircleCI build environment looks like it has a zillion and a - # half cores. Don't let Nix autodetect this high core count - # because it blows up memory usage and fails the test run. Pick a - # number of cores that suites the build environment we're paying - # for (the free one!). - source .circleci/lib.sh - cache_if_able nix-build \ - --cores 8 \ - --argstr pkgsVersion "nixpkgs-<>" \ - nix/tests.nix + - "nix-build": + nixpkgs: "<>" + pythonVersion: "<>" + buildSteps: + - "run": + name: "Unit Test" + command: | + # The dependencies are all built so we can allow more + # parallelism here. + source .circleci/lib.sh + cache_if_able nix-build \ + --cores 8 \ + --argstr pkgsVersion "nixpkgs-<>" \ + --argstr pythonVersion "<>" \ + nix/tests.nix typechecks: docker: @@ -509,6 +496,15 @@ jobs: PYTHON_VERSION: "3.9" + build-image-ubuntu-22-04: + <<: *BUILD_IMAGE + + environment: + DISTRO: "ubuntu" + TAG: "22.04" + PYTHON_VERSION: "3.10" + + build-image-oraclelinux-8: <<: *BUILD_IMAGE @@ -527,7 +523,6 @@ jobs: # build-image-pypy27-buster: # <<: *BUILD_IMAGE - # environment: # DISTRO: "pypy" # TAG: "buster" @@ -535,3 +530,87 @@ jobs: # # setting up PyPy 3 in the image building toolchain. This value is just # # for constructing the right Docker image tag. # PYTHON_VERSION: "2" + +executors: + nix: + docker: + # Run in a highly Nix-capable environment. + - <<: *DOCKERHUB_AUTH + image: "nixos/nix:2.10.3" + environment: + # CACHIX_AUTH_TOKEN is manually set in the CircleCI web UI and allows us + # to push to CACHIX_NAME. CACHIX_NAME tells cachix which cache to push + # to. + CACHIX_NAME: "tahoe-lafs-opensource" + +commands: + nix-build: + parameters: + nixpkgs: + description: >- + Reference the name of a niv-managed nixpkgs source (see `niv show` + and nix/sources.json) + type: "string" + pythonVersion: + description: >- + Reference the name of a Python package in nixpkgs to use. + type: "string" + buildSteps: + description: >- + The build steps to execute after setting up the build environment. + type: "steps" + + steps: + - "run": + # Get cachix for Nix-friendly caching. + name: "Install Basic Dependencies" + command: | + NIXPKGS="https://github.com/nixos/nixpkgs/archive/nixos-<>.tar.gz" + nix-env \ + --file $NIXPKGS \ + --install \ + -A cachix bash + # Activate it for "binary substitution". This sets up + # configuration tht lets Nix download something from the cache + # instead of building it locally, if possible. + cachix use "${CACHIX_NAME}" + + - "checkout" + + - "run": + # The Nix package doesn't know how to do this part, unfortunately. + name: "Generate version" + command: | + nix-shell \ + -p 'python3.withPackages (ps: [ ps.setuptools ])' \ + --run 'python setup.py update_version' + + - "run": + name: "Build Dependencies" + command: | + # CircleCI build environment looks like it has a zillion and a + # half cores. Don't let Nix autodetect this high core count + # because it blows up memory usage and fails the test run. Pick a + # number of cores that suits the build environment we're paying + # for (the free one!). + source .circleci/lib.sh + # nix-shell will build all of the dependencies of the target but + # not the target itself. + cache_if_able nix-shell \ + --run "" \ + --cores 3 \ + --argstr pkgsVersion "nixpkgs-<>" \ + --argstr pythonVersion "<>" \ + ./default.nix + + - "run": + name: "Build Package" + command: | + source .circleci/lib.sh + cache_if_able nix-build \ + --cores 4 \ + --argstr pkgsVersion "nixpkgs-<>" \ + --argstr pythonVersion "<>" \ + ./default.nix + + - steps: "<>" diff --git a/.circleci/create-virtualenv.sh b/.circleci/create-virtualenv.sh index 810ce5ae2..7327d0859 100755 --- a/.circleci/create-virtualenv.sh +++ b/.circleci/create-virtualenv.sh @@ -47,3 +47,7 @@ export PIP_FIND_LINKS="file://${WHEELHOUSE_PATH}" # above, it may still not be able to get us a compatible version unless we # explicitly ask for one. "${PIP}" install --upgrade setuptools==44.0.0 wheel + +# Just about every user of this image wants to use tox from the bootstrap +# virtualenv so go ahead and install it now. +"${PIP}" install "tox~=3.0" diff --git a/.circleci/populate-wheelhouse.sh b/.circleci/populate-wheelhouse.sh index 857171979..239c8367b 100755 --- a/.circleci/populate-wheelhouse.sh +++ b/.circleci/populate-wheelhouse.sh @@ -3,18 +3,6 @@ # https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/ set -euxo pipefail -# Basic Python packages that you just need to have around to do anything, -# practically speaking. -BASIC_DEPS="pip wheel" - -# Python packages we need to support the test infrastructure. *Not* packages -# Tahoe-LAFS itself (implementation or test suite) need. -TEST_DEPS="tox~=3.0 codecov" - -# Python packages we need to generate test reports for CI infrastructure. -# *Not* packages Tahoe-LAFS itself (implement or test suite) need. -REPORTING_DEPS="python-subunit junitxml subunitreporter" - # The filesystem location of the wheelhouse which we'll populate with wheels # for all of our dependencies. WHEELHOUSE_PATH="$1" @@ -41,15 +29,5 @@ export PIP_FIND_LINKS="file://${WHEELHOUSE_PATH}" LANG="en_US.UTF-8" "${PIP}" \ wheel \ --wheel-dir "${WHEELHOUSE_PATH}" \ - "${PROJECT_ROOT}"[test] \ - ${BASIC_DEPS} \ - ${TEST_DEPS} \ - ${REPORTING_DEPS} - -# Not strictly wheelhouse population but ... Note we omit basic deps here. -# They're in the wheelhouse if Tahoe-LAFS wants to drag them in but it will -# have to ask. -"${PIP}" \ - install \ - ${TEST_DEPS} \ - ${REPORTING_DEPS} + "${PROJECT_ROOT}"[testenv] \ + "${PROJECT_ROOT}"[test] diff --git a/.circleci/run-tests.sh b/.circleci/run-tests.sh index 6d7a881fe..d897cc729 100755 --- a/.circleci/run-tests.sh +++ b/.circleci/run-tests.sh @@ -79,9 +79,10 @@ else alternative="false" fi +WORKDIR=/tmp/tahoe-lafs.tox ${TIMEOUT} ${BOOTSTRAP_VENV}/bin/tox \ -c ${PROJECT_ROOT}/tox.ini \ - --workdir /tmp/tahoe-lafs.tox \ + --workdir "${WORKDIR}" \ -e "${TAHOE_LAFS_TOX_ENVIRONMENT}" \ ${TAHOE_LAFS_TOX_ARGS} || "${alternative}" @@ -93,5 +94,6 @@ if [ -n "${ARTIFACTS}" ]; then # Create a junitxml results area. mkdir -p "$(dirname "${JUNITXML}")" - "${BOOTSTRAP_VENV}"/bin/subunit2junitxml < "${SUBUNIT2}" > "${JUNITXML}" || "${alternative}" + + "${WORKDIR}/${TAHOE_LAFS_TOX_ENVIRONMENT}/bin/subunit2junitxml" < "${SUBUNIT2}" > "${JUNITXML}" || "${alternative}" fi diff --git a/.circleci/setup-virtualenv.sh b/.circleci/setup-virtualenv.sh index feccbbf23..7087c5120 100755 --- a/.circleci/setup-virtualenv.sh +++ b/.circleci/setup-virtualenv.sh @@ -26,12 +26,7 @@ shift || : # Tell pip where it can find any existing wheels. export PIP_FIND_LINKS="file://${WHEELHOUSE_PATH}" - -# It is tempting to also set PIP_NO_INDEX=1 but (a) that will cause problems -# between the time dependencies change and the images are re-built and (b) the -# upcoming-deprecations job wants to install some dependencies from github and -# it's awkward to get that done any earlier than the tox run. So, we don't -# set it. +export PIP_NO_INDEX="1" # Get everything else installed in it, too. "${BOOTSTRAP_VENV}"/bin/tox \ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e006d90ac..1061657b9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -46,7 +46,6 @@ jobs: matrix: os: - windows-latest - - ubuntu-latest python-version: - "3.8" - "3.9" @@ -54,9 +53,9 @@ jobs: - "3.11" include: # On macOS don't bother with 3.8, just to get faster builds. - - os: macos-latest + - os: macos-12 python-version: "3.9" - - os: macos-latest + - os: macos-12 python-version: "3.11" # We only support PyPy on Linux at the moment. - os: ubuntu-latest @@ -80,7 +79,7 @@ jobs: - name: Install Python packages run: | - pip install --upgrade codecov "tox<4" tox-gh-actions setuptools + pip install --upgrade "tox<4" tox-gh-actions setuptools pip list - name: Display tool versions @@ -166,16 +165,16 @@ jobs: fail-fast: false matrix: include: - - os: macos-latest - python-version: "3.9" + - os: macos-12 + python-version: "3.11" force-foolscap: false - os: windows-latest - python-version: "3.9" + python-version: "3.11" force-foolscap: false # 22.04 has some issue with Tor at the moment: # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3943 - os: ubuntu-20.04 - python-version: "3.11" + python-version: "3.10" force-foolscap: false steps: @@ -249,7 +248,7 @@ jobs: fail-fast: false matrix: os: - - macos-10.15 + - macos-12 - windows-latest - ubuntu-latest python-version: diff --git a/.gitignore b/.gitignore index 7c7fa2afd..0cf688c54 100644 --- a/.gitignore +++ b/.gitignore @@ -53,3 +53,5 @@ zope.interface-*.egg # This is the plaintext of the private environment needed for some CircleCI # operations. It's never supposed to be checked in. secret-env-plain + +.ruff_cache \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 65b390f26..665b53178 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -1,5 +1,10 @@ version: 2 +build: + os: ubuntu-22.04 + tools: + python: "3.10" + python: install: - requirements: docs/requirements.txt diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 000000000..2dd6b59b5 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,18 @@ +select = [ + # Pyflakes checks + "F", + # Prohibit tabs: + "W191", + # No trailing whitespace: + "W291", + "W293", + # Make sure we bind closure variables in a loop (equivalent to pylint + # cell-var-from-loop): + "B023", + # Don't silence exceptions in finally by accident: + "B012", + # Don't use mutable default arguments: + "B006", + # Errors from PyLint: + "PLE", +] \ No newline at end of file diff --git a/default.nix b/default.nix index b87a6730a..d616f63b8 100644 --- a/default.nix +++ b/default.nix @@ -32,11 +32,7 @@ in }: with (pkgs.${pythonVersion}.override { - packageOverrides = self: super: { - # Some dependencies aren't packaged in nixpkgs so supply our own packages. - pycddl = self.callPackage ./nix/pycddl.nix { }; - txi2p = self.callPackage ./nix/txi2p.nix { }; - }; + packageOverrides = import ./nix/python-overrides.nix; }).pkgs; callPackage ./nix/tahoe-lafs.nix { # Select whichever package extras were requested. diff --git a/docs/performance.rst b/docs/performance.rst index 6ddeb1fe8..a0487c72c 100644 --- a/docs/performance.rst +++ b/docs/performance.rst @@ -82,8 +82,9 @@ network: A memory footprint: N/K*A -notes: Tahoe-LAFS generates a new RSA keypair for each mutable file that it -publishes to a grid. This takes up to 1 or 2 seconds on a typical desktop PC. +notes: +Tahoe-LAFS generates a new RSA keypair for each mutable file that it publishes to a grid. +This takes around 100 milliseconds on a relatively high-end laptop from 2021. Part of the process of encrypting, encoding, and uploading a mutable file to a Tahoe-LAFS grid requires that the entire file be in memory at once. For larger diff --git a/docs/proposed/http-storage-node-protocol.rst b/docs/proposed/http-storage-node-protocol.rst index aee201cf5..5009a992e 100644 --- a/docs/proposed/http-storage-node-protocol.rst +++ b/docs/proposed/http-storage-node-protocol.rst @@ -3,7 +3,7 @@ Storage Node Protocol ("Great Black Swamp", "GBS") ================================================== -The target audience for this document is Tahoe-LAFS developers. +The target audience for this document is developers working on Tahoe-LAFS or on an alternate implementation intended to be interoperable. After reading this document, one should expect to understand how Tahoe-LAFS clients interact over the network with Tahoe-LAFS storage nodes. @@ -64,6 +64,10 @@ Glossary lease renew secret a short secret string which storage servers required to be presented before allowing a particular lease to be renewed +The key words +"MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" +in this document are to be interpreted as described in RFC 2119. + Motivation ---------- @@ -119,8 +123,8 @@ An HTTP-based protocol can make use of TLS in largely the same way to provide th Provision of these properties *is* dependant on implementers following Great Black Swamp's rules for x509 certificate validation (rather than the standard "web" rules for validation). -Requirements ------------- +Design Requirements +------------------- Security ~~~~~~~~ @@ -189,6 +193,9 @@ Solutions An HTTP-based protocol, dubbed "Great Black Swamp" (or "GBS"), is described below. This protocol aims to satisfy the above requirements at a lower level of complexity than the current Foolscap-based protocol. +Summary (Non-normative) +~~~~~~~~~~~~~~~~~~~~~~~ + Communication with the storage node will take place using TLS. The TLS version and configuration will be dictated by an ongoing understanding of best practices. The storage node will present an x509 certificate during the TLS handshake. @@ -237,10 +244,10 @@ When Bob's client issues HTTP requests to Alice's storage node it includes the * .. note:: Foolscap TubIDs are 20 bytes (SHA1 digest of the certificate). - They are encoded with Base32 for a length of 32 bytes. + They are encoded with `Base32`_ for a length of 32 bytes. SPKI information discussed here is 32 bytes (SHA256 digest). - They would be encoded in Base32 for a length of 52 bytes. - `base64url`_ provides a more compact encoding of the information while remaining URL-compatible. + They would be encoded in `Base32`_ for a length of 52 bytes. + `unpadded base64url`_ provides a more compact encoding of the information while remaining URL-compatible. This would encode the SPKI information for a length of merely 43 bytes. SHA1, the current Foolscap hash function, @@ -329,15 +336,117 @@ and shares. A particular resource is addressed by the HTTP request path. Details about the interface are encoded in the HTTP message body. +String Encoding +~~~~~~~~~~~~~~~ + +.. _Base32: + +Base32 +!!!!!! + +Where the specification refers to Base32 the meaning is *unpadded* Base32 encoding as specified by `RFC 4648`_ using a *lowercase variation* of the alphabet from Section 6. + +That is, the alphabet is: + +.. list-table:: Base32 Alphabet + :header-rows: 1 + + * - Value + - Encoding + - Value + - Encoding + - Value + - Encoding + - Value + - Encoding + + * - 0 + - a + - 9 + - j + - 18 + - s + - 27 + - 3 + * - 1 + - b + - 10 + - k + - 19 + - t + - 28 + - 4 + * - 2 + - c + - 11 + - l + - 20 + - u + - 29 + - 5 + * - 3 + - d + - 12 + - m + - 21 + - v + - 30 + - 6 + * - 4 + - e + - 13 + - n + - 22 + - w + - 31 + - 7 + * - 5 + - f + - 14 + - o + - 23 + - x + - + - + * - 6 + - g + - 15 + - p + - 24 + - y + - + - + * - 7 + - h + - 16 + - q + - 25 + - z + - + - + * - 8 + - i + - 17 + - r + - 26 + - 2 + - + - + Message Encoding ~~~~~~~~~~~~~~~~ -The preferred encoding for HTTP message bodies is `CBOR`_. -A request may be submitted using an alternate encoding by declaring this in the ``Content-Type`` header. -A request may indicate its preference for an alternate encoding in the response using the ``Accept`` header. -These two headers are used in the typical way for an HTTP application. +Clients and servers MUST use the ``Content-Type`` and ``Accept`` header fields as specified in `RFC 9110`_ for message body negotiation. -The only other encoding support for which is currently recommended is JSON. +The encoding for HTTP message bodies SHOULD be `CBOR`_. +Clients submitting requests using this encoding MUST include a ``Content-Type: application/cbor`` request header field. +A request MAY be submitted using an alternate encoding by declaring this in the ``Content-Type`` header field. +A request MAY indicate its preference for an alternate encoding in the response using the ``Accept`` header field. +A request which includes no ``Accept`` header field MUST be interpreted in the same way as a request including a ``Accept: application/cbor`` header field. + +Clients and servers MAY support additional request and response message body encodings. + +Clients and servers SHOULD support ``application/json`` request and response message body encoding. For HTTP messages carrying binary share data, this is expected to be a particularly poor encoding. However, @@ -350,10 +459,23 @@ Because of the simple types used throughout and the equivalence described in `RFC 7049`_ these examples should be representative regardless of which of these two encodings is chosen. -The one exception is sets. -For CBOR messages, any sequence that is semantically a set (i.e. no repeated values allowed, order doesn't matter, and elements are hashable in Python) should be sent as a set. -Tag 6.258 is used to indicate sets in CBOR; see `the CBOR registry `_ for more details. -Sets will be represented as JSON lists in examples because JSON doesn't support sets. +There are two exceptions to this rule. + +1. Sets +!!!!!!! + +For CBOR messages, +any sequence that is semantically a set (i.e. no repeated values allowed, order doesn't matter, and elements are hashable in Python) should be sent as a set. +Tag 6.258 is used to indicate sets in CBOR; +see `the CBOR registry `_ for more details. +The JSON encoding does not support sets. +Sets MUST be represented as arrays in JSON-encoded messages. + +2. Bytes +!!!!!!!! + +The CBOR encoding natively supports a bytes type while the JSON encoding does not. +Bytes MUST be represented as strings giving the `Base64`_ representation of the original bytes value. HTTP Design ~~~~~~~~~~~ @@ -368,29 +490,50 @@ one branch contains all of the share data; another branch contains all of the lease data; etc. -An ``Authorization`` header in requests is required for all endpoints. -The standard HTTP authorization protocol is used. -The authentication *type* used is ``Tahoe-LAFS``. -The swissnum from the NURL used to locate the storage service is used as the *credentials*. -If credentials are not presented or the swissnum is not associated with a storage service then no storage processing is performed and the request receives an ``401 UNAUTHORIZED`` response. +Clients and servers MUST use the ``Authorization`` header field, +as specified in `RFC 9110`_, +for authorization of all requests to all endpoints specified here. +The authentication *type* MUST be ``Tahoe-LAFS``. +Clients MUST present the `Base64`_-encoded representation of the swissnum from the NURL used to locate the storage service as the *credentials*. -There are also, for some endpoints, secrets sent via ``X-Tahoe-Authorization`` headers. -If these are: +If credentials are not presented or the swissnum is not associated with a storage service then the server MUST issue a ``401 UNAUTHORIZED`` response and perform no other processing of the message. + +Requests to certain endpoints MUST include additional secrets in the ``X-Tahoe-Authorization`` headers field. +The endpoints which require these secrets are: + +* ``PUT /storage/v1/lease/:storage_index``: + The secrets included MUST be ``lease-renew-secret`` and ``lease-cancel-secret``. + +* ``POST /storage/v1/immutable/:storage_index``: + The secrets included MUST be ``lease-renew-secret``, ``lease-cancel-secret``, and ``upload-secret``. + +* ``PATCH /storage/v1/immutable/:storage_index/:share_number``: + The secrets included MUST be ``upload-secret``. + +* ``PUT /storage/v1/immutable/:storage_index/:share_number/abort``: + The secrets included MUST be ``upload-secret``. + +* ``POST /storage/v1/mutable/:storage_index/read-test-write``: + The secrets included MUST be ``lease-renew-secret``, ``lease-cancel-secret``, and ``write-enabler``. + +If these secrets are: 1. Missing. 2. The wrong length. 3. Not the expected kind of secret. 4. They are otherwise unparseable before they are actually semantically used. -the server will respond with ``400 BAD REQUEST``. +the server MUST respond with ``400 BAD REQUEST`` and perform no other processing of the message. 401 is not used because this isn't an authorization problem, this is a "you sent garbage and should know better" bug. -If authorization using the secret fails, then a ``401 UNAUTHORIZED`` response should be sent. +If authorization using the secret fails, +then the server MUST send a ``401 UNAUTHORIZED`` response and perform no other processing of the message. Encoding ~~~~~~~~ -* ``storage_index`` should be base32 encoded (RFC3548) in URLs. +* ``storage_index`` MUST be `Base32`_ encoded in URLs. +* ``share_number`` MUST be a decimal representation General ~~~~~~~ @@ -398,21 +541,27 @@ General ``GET /storage/v1/version`` !!!!!!!!!!!!!!!!!!!!!!!!!!! -Retrieve information about the version of the storage server. -Information is returned as an encoded mapping. -For example:: +This endpoint allows clients to retrieve some basic metadata about a storage server from the storage service. +The response MUST validate against this CDDL schema:: + + {'http://allmydata.org/tahoe/protocols/storage/v1' => { + 'maximum-immutable-share-size' => uint + 'maximum-mutable-share-size' => uint + 'available-space' => uint + } + 'application-version' => bstr + } + +The server SHOULD populate as many fields as possible with accurate information about its behavior. + +For fields which relate to a specific API +the semantics are documented below in the section for that API. +For fields that are more general than a single API the semantics are as follows: + +* available-space: + The server SHOULD use this field to advertise the amount of space that it currently considers unused and is willing to allocate for client requests. + The value is a number of bytes. - { "http://allmydata.org/tahoe/protocols/storage/v1" : - { "maximum-immutable-share-size": 1234, - "maximum-mutable-share-size": 1235, - "available-space": 123456, - "tolerates-immutable-read-overrun": true, - "delete-mutable-shares-with-zero-length-writev": true, - "fills-holes-with-zero-bytes": true, - "prevents-read-past-end-of-share-data": true - }, - "application-version": "1.13.0" - } ``PUT /storage/v1/lease/:storage_index`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! @@ -471,21 +620,37 @@ Writing !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Initialize an immutable storage index with some buckets. -The buckets may have share data written to them once. -A lease is also created for the shares. +The server MUST allow share data to be written to the buckets at most one time. +The server MAY create a lease for the buckets. Details of the buckets to create are encoded in the request body. +The request body MUST validate against this CDDL schema:: + + { + share-numbers: #6.258([0*256 uint]) + allocated-size: uint + } + For example:: {"share-numbers": [1, 7, ...], "allocated-size": 12345} -The request must include ``X-Tahoe-Authorization`` HTTP headers that set the various secrets—upload, lease renewal, lease cancellation—that will be later used to authorize various operations. +The server SHOULD accept a value for **allocated-size** that is less than or equal to the lesser of the values of the server's version message's **maximum-immutable-share-size** or **available-space** values. + +The request MUST include ``X-Tahoe-Authorization`` HTTP headers that set the various secrets—upload, lease renewal, lease cancellation—that will be later used to authorize various operations. For example:: X-Tahoe-Authorization: lease-renew-secret X-Tahoe-Authorization: lease-cancel-secret X-Tahoe-Authorization: upload-secret -The response body includes encoded information about the created buckets. +The response body MUST include encoded information about the created buckets. +The response body MUST validate against this CDDL schema:: + + { + already-have: #6.258([0*256 uint]) + allocated: #6.258([0*256 uint]) + } + For example:: {"already-have": [1, ...], "allocated": [7, ...]} @@ -542,27 +707,35 @@ Rejected designs for upload secrets: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Write data for the indicated share. -The share number must belong to the storage index. -The request body is the raw share data (i.e., ``application/octet-stream``). -*Content-Range* requests are required; for large transfers this allows partially complete uploads to be resumed. +The share number MUST belong to the storage index. +The request body MUST be the raw share data (i.e., ``application/octet-stream``). +The request MUST include a *Content-Range* header field; +for large transfers this allows partially complete uploads to be resumed. + For example, a 1MiB share can be divided in to eight separate 128KiB chunks. Each chunk can be uploaded in a separate request. Each request can include a *Content-Range* value indicating its placement within the complete share. If any one of these requests fails then at most 128KiB of upload work needs to be retried. -The server must recognize when all of the data has been received and mark the share as complete +The server MUST recognize when all of the data has been received and mark the share as complete (which it can do because it was informed of the size when the storage index was initialized). -The request must include a ``X-Tahoe-Authorization`` header that includes the upload secret:: +The request MUST include a ``X-Tahoe-Authorization`` header that includes the upload secret:: X-Tahoe-Authorization: upload-secret Responses: -* When a chunk that does not complete the share is successfully uploaded the response is ``OK``. - The response body indicates the range of share data that has yet to be uploaded. - That is:: +* When a chunk that does not complete the share is successfully uploaded the response MUST be ``OK``. + The response body MUST indicate the range of share data that has yet to be uploaded. + The response body MUST validate against this CDDL schema:: + + { + required: [0* {begin: uint, end: uint}] + } + + For example:: { "required": [ { "begin": @@ -573,11 +746,12 @@ Responses: ] } -* When the chunk that completes the share is successfully uploaded the response is ``CREATED``. +* When the chunk that completes the share is successfully uploaded the response MUST be ``CREATED``. * If the *Content-Range* for a request covers part of the share that has already, and the data does not match already written data, - the response is ``CONFLICT``. - At this point the only thing to do is abort the upload and start from scratch (see below). + the response MUST be ``CONFLICT``. + In this case the client MUST abort the upload. + The client MAY then restart the upload from scratch. Discussion `````````` @@ -603,34 +777,42 @@ From RFC 7231:: This cancels an *in-progress* upload. -The request must include a ``X-Tahoe-Authorization`` header that includes the upload secret:: +The request MUST include a ``X-Tahoe-Authorization`` header that includes the upload secret:: X-Tahoe-Authorization: upload-secret -The response code: - -* When the upload is still in progress and therefore the abort has succeeded, - the response is ``OK``. - Future uploads can start from scratch with no pre-existing upload state stored on the server. -* If the uploaded has already finished, the response is 405 (Method Not Allowed) - and no change is made. +If there is an incomplete upload with a matching upload-secret then the server MUST consider the abort to have succeeded. +In this case the response MUST be ``OK``. +The server MUST respond to all future requests as if the operations related to this upload did not take place. +If there is no incomplete upload with a matching upload-secret then the server MUST respond with ``Method Not Allowed`` (405). +The server MUST make no client-visible changes to its state in this case. ``POST /storage/v1/immutable/:storage_index/:share_number/corrupt`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Advise the server the data read from the indicated share was corrupt. The -request body includes an human-meaningful text string with details about the -corruption. It also includes potentially important details about the share. +Advise the server the data read from the indicated share was corrupt. +The request body includes an human-meaningful text string with details about the corruption. +It also includes potentially important details about the share. +The request body MUST validate against this CDDL schema:: + + { + reason: tstr .size (1..32765) + } For example:: {"reason": "expected hash abcd, got hash efgh"} -.. share-type, storage-index, and share-number are inferred from the URL +The report pertains to the immutable share with a **storage index** and **share number** given in the request path. +If the identified **storage index** and **share number** are known to the server then the response SHOULD be accepted and made available to server administrators. +In this case the response SHOULD be ``OK``. +If the response is not accepted then the response SHOULD be ``Not Found`` (404). -The response code is OK (200) by default, or NOT FOUND (404) if the share -couldn't be found. +Discussion +`````````` + +The seemingly odd length limit on ``reason`` is chosen so that the *encoded* representation of the message is limited to 32768. Reading ~~~~~~~ @@ -638,26 +820,36 @@ Reading ``GET /storage/v1/immutable/:storage_index/shares`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Retrieve a list (semantically, a set) indicating all shares available for the -indicated storage index. For example:: +Retrieve a list (semantically, a set) indicating all shares available for the indicated storage index. +The response body MUST validate against this CDDL schema:: + + #6.258([0*256 uint]) + +For example:: [1, 5] -An unknown storage index results in an empty list. +If the **storage index** in the request path is not known to the server then the response MUST include an empty list. ``GET /storage/v1/immutable/:storage_index/:share_number`` !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Read a contiguous sequence of bytes from one share in one bucket. -The response body is the raw share data (i.e., ``application/octet-stream``). -The ``Range`` header may be used to request exactly one ``bytes`` range, in which case the response code will be 206 (partial content). -Interpretation and response behavior is as specified in RFC 7233 § 4.1. -Multiple ranges in a single request are *not* supported; open-ended ranges are also not supported. +The response body MUST be the raw share data (i.e., ``application/octet-stream``). +The ``Range`` header MAY be used to request exactly one ``bytes`` range, +in which case the response code MUST be ``Partial Content`` (206). +Interpretation and response behavior MUST be as specified in RFC 7233 § 4.1. +Multiple ranges in a single request are *not* supported; +open-ended ranges are also not supported. +Clients MUST NOT send requests using these features. -If the response reads beyond the end of the data, the response may be shorter than the requested range. -The resulting ``Content-Range`` header will be consistent with the returned data. +If the response reads beyond the end of the data, +the response MUST be shorter than the requested range. +It MUST contain all data up to the end of the share and then end. +The resulting ``Content-Range`` header MUST be consistent with the returned data. -If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. +If the response to a query is an empty range, +the server MUST send a ``No Content`` (204) response. Discussion `````````` @@ -696,13 +888,27 @@ The first write operation on a mutable storage index creates it (that is, there is no separate "create this storage index" operation as there is for the immutable storage index type). -The request must include ``X-Tahoe-Authorization`` headers with write enabler and lease secrets:: +The request MUST include ``X-Tahoe-Authorization`` headers with write enabler and lease secrets:: X-Tahoe-Authorization: write-enabler X-Tahoe-Authorization: lease-cancel-secret X-Tahoe-Authorization: lease-renew-secret -The request body includes test, read, and write vectors for the operation. +The request body MUST include test, read, and write vectors for the operation. +The request body MUST validate against this CDDL schema:: + + { + "test-write-vectors": { + 0*256 share_number : { + "test": [0*30 {"offset": uint, "size": uint, "specimen": bstr}] + "write": [* {"offset": uint, "data": bstr}] + "new-length": uint / null + } + } + "read-vector": [0*30 {"offset": uint, "size": uint}] + } + share_number = uint + For example:: { @@ -725,6 +931,14 @@ For example:: The response body contains a boolean indicating whether the tests all succeed (and writes were applied) and a mapping giving read data (pre-write). +The response body MUST validate against this CDDL schema:: + + { + "success": bool, + "data": {0*256 share_number: [0* bstr]} + } + share_number = uint + For example:: { @@ -736,8 +950,17 @@ For example:: } } -A test vector or read vector that read beyond the boundaries of existing data will return nothing for any bytes past the end. -As a result, if there is no data at all, an empty bytestring is returned no matter what the offset or length. +A client MAY send a test vector or read vector to bytes beyond the end of existing data. +In this case a server MUST behave as if the test or read vector referred to exactly as much data exists. + +For example, +consider the case where the server has 5 bytes of data for a particular share. +If a client sends a read vector with an ``offset`` of 1 and a ``size`` of 4 then the server MUST respond with all of the data except the first byte. +If a client sends a read vector with the same ``offset`` and a ``size`` of 5 (or any larger value) then the server MUST respond in the same way. + +Similarly, +if there is no data at all, +an empty byte string is returned no matter what the offset or length. Reading ~~~~~~~ @@ -746,23 +969,34 @@ Reading !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Retrieve a set indicating all shares available for the indicated storage index. -For example (this is shown as list, since it will be list for JSON, but will be set for CBOR):: +The response body MUST validate against this CDDL schema:: + + #6.258([0*256 uint]) + +For example:: [1, 5] ``GET /storage/v1/mutable/:storage_index/:share_number`` -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -Read data from the indicated mutable shares, just like ``GET /storage/v1/immutable/:storage_index`` +Read data from the indicated mutable shares, just like ``GET /storage/v1/immutable/:storage_index``. -The ``Range`` header may be used to request exactly one ``bytes`` range, in which case the response code will be 206 (partial content). -Interpretation and response behavior is as specified in RFC 7233 § 4.1. -Multiple ranges in a single request are *not* supported; open-ended ranges are also not supported. +The response body MUST be the raw share data (i.e., ``application/octet-stream``). +The ``Range`` header MAY be used to request exactly one ``bytes`` range, +in which case the response code MUST be ``Partial Content`` (206). +Interpretation and response behavior MUST be specified in RFC 7233 § 4.1. +Multiple ranges in a single request are *not* supported; +open-ended ranges are also not supported. +Clients MUST NOT send requests using these features. -If the response reads beyond the end of the data, the response may be shorter than the requested range. -The resulting ``Content-Range`` header will be consistent with the returned data. +If the response reads beyond the end of the data, +the response MUST be shorter than the requested range. +It MUST contain all data up to the end of the share and then end. +The resulting ``Content-Range`` header MUST be consistent with the returned data. -If the response to a query is an empty range, the ``NO CONTENT`` (204) response code will be used. +If the response to a query is an empty range, +the server MUST send a ``No Content`` (204) response. ``POST /storage/v1/mutable/:storage_index/:share_number/corrupt`` @@ -774,6 +1008,9 @@ Just like the immutable version. Sample Interactions ------------------- +This section contains examples of client/server interactions to help illuminate the above specification. +This section is non-normative. + Immutable Data ~~~~~~~~~~~~~~ @@ -926,10 +1163,16 @@ otherwise it will read a byte which won't match `b""`:: 204 NO CONTENT +.. _Base64: https://www.rfc-editor.org/rfc/rfc4648#section-4 + +.. _RFC 4648: https://tools.ietf.org/html/rfc4648 + .. _RFC 7469: https://tools.ietf.org/html/rfc7469#section-2.4 .. _RFC 7049: https://tools.ietf.org/html/rfc7049#section-4 +.. _RFC 9110: https://tools.ietf.org/html/rfc9110 + .. _CBOR: http://cbor.io/ .. [#] @@ -974,7 +1217,7 @@ otherwise it will read a byte which won't match `b""`:: spki_encoded = urlsafe_b64encode(spki_sha256) assert spki_encoded == tub_id - Note we use `base64url`_ rather than the Foolscap- and Tahoe-LAFS-preferred Base32. + Note we use `unpadded base64url`_ rather than the Foolscap- and Tahoe-LAFS-preferred Base32. .. [#] https://www.cvedetails.com/cve/CVE-2017-5638/ @@ -985,6 +1228,6 @@ otherwise it will read a byte which won't match `b""`:: .. [#] https://efail.de/ -.. _base64url: https://tools.ietf.org/html/rfc7515#appendix-C +.. _unpadded base64url: https://tools.ietf.org/html/rfc7515#appendix-C .. _attacking SHA1: https://en.wikipedia.org/wiki/SHA-1#Attacks diff --git a/docs/specifications/dirnodes.rst b/docs/specifications/dirnodes.rst index 88fcd0fa9..c53d28a26 100644 --- a/docs/specifications/dirnodes.rst +++ b/docs/specifications/dirnodes.rst @@ -267,7 +267,7 @@ How well does this design meet the goals? value, so there are no opportunities for staleness 9. monotonicity: VERY: the single point of access also protects against retrograde motion - + Confidentiality leaks in the storage servers @@ -332,8 +332,9 @@ MDMF design rules allow for efficient random-access reads from the middle of the file, which would give the index something useful to point at. The current SDMF design generates a new RSA public/private keypair for each -directory. This takes considerable time and CPU effort, generally one or two -seconds per directory. We have designed (but not yet built) a DSA-based +directory. This takes some time and CPU effort (around 100 milliseconds on a +relatively high-end 2021 laptop) per directory. +We have designed (but not yet built) a DSA-based mutable file scheme which will use shared parameters to reduce the directory-creation effort to a bare minimum (picking a random number instead of generating two random primes). @@ -363,7 +364,7 @@ single child, looking up a single child) would require pulling or pushing a lot of unrelated data, increasing network overhead (and necessitating test-and-set semantics for the modification side, which increases the chances that a user operation will fail, making it more challenging to provide -promises of atomicity to the user). +promises of atomicity to the user). It would also make it much more difficult to enable the delegation ("sharing") of specific directories. Since each aggregate "realm" provides @@ -469,4 +470,3 @@ Preventing delegation between communication parties is just as pointless as asking Bob to forget previously accessed files. However, there may be value to configuring the UI to ask Carol to not share files with Bob, or to removing all files from Bob's view at the same time his access is revoked. - diff --git a/integration/conftest.py b/integration/conftest.py index 331b1e7e3..643295291 100644 --- a/integration/conftest.py +++ b/integration/conftest.py @@ -1,6 +1,10 @@ """ Ported to Python 3. """ + +from __future__ import annotations + +import os import sys import shutil from time import sleep @@ -14,6 +18,7 @@ from eliot import ( log_call, ) +from twisted.python.filepath import FilePath from twisted.python.procutils import which from twisted.internet.defer import DeferredList from twisted.internet.error import ( @@ -41,7 +46,16 @@ from .grid import ( create_flog_gatherer, create_grid, ) +from allmydata.node import read_config +# No reason for HTTP requests to take longer than four minutes in the +# integration tests. See allmydata/scripts/common_http.py for usage. +os.environ["__TAHOE_CLI_HTTP_TIMEOUT"] = "240" + +# Make Foolscap logging go into Twisted logging, so that integration test logs +# include extra information +# (https://github.com/warner/foolscap/blob/latest-release/doc/logging.rst): +os.environ["FLOGTOTWISTED"] = "1" # pytest customization hooks @@ -108,7 +122,7 @@ def port_allocator(reactor): @pytest.fixture(scope='session') @log_call(action_type=u"integration:temp_dir", include_args=[]) -def temp_dir(request): +def temp_dir(request) -> str: """ Invoke like 'py.test --keep-tempdir ...' to avoid deleting the temp-dir """ @@ -166,22 +180,16 @@ def introducer_furl(introducer, temp_dir): return introducer.furl -@pytest.fixture(scope='session') +@pytest.fixture @log_call( action_type=u"integration:tor:introducer", include_args=["temp_dir", "flog_gatherer"], include_result=False, ) def tor_introducer(reactor, temp_dir, flog_gatherer, request): - config = ''' -[node] -nickname = introducer_tor -web.port = 4561 -log_gatherer.furl = {log_furl} -'''.format(log_furl=flog_gatherer) - intro_dir = join(temp_dir, 'introducer_tor') - print("making introducer", intro_dir) + print("making Tor introducer in {}".format(intro_dir)) + print("(this can take tens of seconds to allocate Onion address)") if not exists(intro_dir): mkdir(intro_dir) @@ -192,20 +200,25 @@ log_gatherer.furl = {log_furl} request, ( 'create-introducer', - '--tor-control-port', 'tcp:localhost:8010', + # The control port should agree with the configuration of the + # Tor network we bootstrap with chutney. + '--tor-control-port', 'tcp:localhost:8007', + '--hide-ip', '--listen=tor', intro_dir, ), ) pytest_twisted.blockon(done_proto.done) - # over-write the config file with our stuff - with open(join(intro_dir, 'tahoe.cfg'), 'w') as f: - f.write(config) + # adjust a few settings + config = read_config(intro_dir, "tub.port") + config.set_config("node", "nickname", "introducer-tor") + config.set_config("node", "web.port", "4561") + config.set_config("node", "log_gatherer.furl", flog_gatherer) # "tahoe run" is consistent across Linux/macOS/Windows, unlike the old # "start" command. - protocol = _MagicTextProtocol('introducer running') + protocol = _MagicTextProtocol('introducer running', "tor_introducer") transport = _tahoe_runner_optional_coverage( protocol, reactor, @@ -224,17 +237,20 @@ log_gatherer.furl = {log_furl} pass request.addfinalizer(cleanup) + print("Waiting for introducer to be ready...") pytest_twisted.blockon(protocol.magic_seen) + print("Introducer ready.") return transport -@pytest.fixture(scope='session') +@pytest.fixture def tor_introducer_furl(tor_introducer, temp_dir): furl_fname = join(temp_dir, 'introducer_tor', 'private', 'introducer.furl') while not exists(furl_fname): print("Don't see {} yet".format(furl_fname)) sleep(.1) furl = open(furl_fname, 'r').read() + print(f"Found Tor introducer furl: {furl} in {furl_fname}") return furl @@ -278,12 +294,9 @@ def alice( reactor, request, temp_dir, introducer_furl, flog_gatherer, "alice", web_port="tcp:9980:interface=localhost", storage=False, - # We're going to kill this ourselves, so no need for finalizer to - # do it: - finalize=False, ) ) - await_client_ready(process) + pytest_twisted.blockon(await_client_ready(process)) # 1. Create a new RW directory cap: cli(process, "create-alias", "test") @@ -314,7 +327,7 @@ alice-key ssh-rsa {ssh_public_key} {rwcap} # 4. Restart the node with new SFTP config. pytest_twisted.blockon(process.restart_async(reactor, request)) - await_client_ready(process) + pytest_twisted.blockon(await_client_ready(process)) print(f"Alice pid: {process.transport.pid}") return process @@ -329,22 +342,37 @@ def bob(reactor, temp_dir, introducer_furl, flog_gatherer, storage_nodes, reques storage=False, ) ) - await_client_ready(process) + pytest_twisted.blockon(await_client_ready(process)) return process @pytest.fixture(scope='session') @pytest.mark.skipif(sys.platform.startswith('win'), 'Tor tests are unstable on Windows') -def chutney(reactor, temp_dir): +def chutney(reactor, temp_dir: str) -> tuple[str, dict[str, str]]: + # Try to find Chutney already installed in the environment. + try: + import chutney + except ImportError: + # Nope, we'll get our own in a moment. + pass + else: + # We already have one, just use it. + return ( + # from `checkout/lib/chutney/__init__.py` we want to get back to + # `checkout` because that's the parent of the directory with all + # of the network definitions. So, great-grand-parent. + FilePath(chutney.__file__).parent().parent().parent().path, + # There's nothing to add to the environment. + {}, + ) chutney_dir = join(temp_dir, 'chutney') mkdir(chutney_dir) - # TODO: - - # check for 'tor' binary explicitly and emit a "skip" if we can't - # find it + missing = [exe for exe in ["tor", "tor-gencert"] if not which(exe)] + if missing: + pytest.skip(f"Some command-line tools not found: {missing}") # XXX yuck! should add a setup.py to chutney so we can at least # "pip install " and/or depend on chutney in "pip @@ -357,7 +385,7 @@ def chutney(reactor, temp_dir): 'git', ( 'git', 'clone', - 'https://git.torproject.org/chutney.git', + 'https://gitlab.torproject.org/tpo/core/chutney.git', chutney_dir, ), env=environ, @@ -373,94 +401,68 @@ def chutney(reactor, temp_dir): ( 'git', '-C', chutney_dir, 'reset', '--hard', - 'c825cba0bcd813c644c6ac069deeb7347d3200ee' + 'c4f6789ad2558dcbfeb7d024c6481d8112bfb6c2' ), env=environ, ) pytest_twisted.blockon(proto.done) - return chutney_dir + return (chutney_dir, {"PYTHONPATH": join(chutney_dir, "lib")}) @pytest.fixture(scope='session') @pytest.mark.skipif(sys.platform.startswith('win'), reason='Tor tests are unstable on Windows') def tor_network(reactor, temp_dir, chutney, request): + """ + Build a basic Tor network. - # this is the actual "chutney" script at the root of a chutney checkout - chutney_dir = chutney - chut = join(chutney_dir, 'chutney') + :param chutney: The root directory of a Chutney checkout and a dict of + additional environment variables to set so a Python process can use + it. - # now, as per Chutney's README, we have to create the network - # ./chutney configure networks/basic - # ./chutney start networks/basic + :return: None + """ + chutney_root, chutney_env = chutney + basic_network = join(chutney_root, 'networks', 'basic') env = environ.copy() - env.update({"PYTHONPATH": join(chutney_dir, "lib")}) - proto = _DumpOutputProtocol(None) - reactor.spawnProcess( - proto, - sys.executable, - ( - sys.executable, '-m', 'chutney.TorNet', 'configure', - join(chutney_dir, 'networks', 'basic'), - ), - path=join(chutney_dir), - env=env, - ) - pytest_twisted.blockon(proto.done) - - proto = _DumpOutputProtocol(None) - reactor.spawnProcess( - proto, - sys.executable, - ( - sys.executable, '-m', 'chutney.TorNet', 'start', - join(chutney_dir, 'networks', 'basic'), - ), - path=join(chutney_dir), - env=env, - ) - pytest_twisted.blockon(proto.done) - - # print some useful stuff - proto = _CollectOutputProtocol() - reactor.spawnProcess( - proto, - sys.executable, - ( - sys.executable, '-m', 'chutney.TorNet', 'status', - join(chutney_dir, 'networks', 'basic'), - ), - path=join(chutney_dir), - env=env, - ) - try: - pytest_twisted.blockon(proto.done) - except ProcessTerminated: - print("Chutney.TorNet status failed (continuing):") - print(proto.output.getvalue()) - - def cleanup(): - print("Tearing down Chutney Tor network") - proto = _CollectOutputProtocol() + env.update(chutney_env) + env.update({ + # default is 60, probably too short for reliable automated use. + "CHUTNEY_START_TIME": "600", + }) + chutney_argv = (sys.executable, '-m', 'chutney.TorNet') + def chutney(argv): + proto = _DumpOutputProtocol(None) reactor.spawnProcess( proto, sys.executable, - ( - sys.executable, '-m', 'chutney.TorNet', 'stop', - join(chutney_dir, 'networks', 'basic'), - ), - path=join(chutney_dir), + chutney_argv + argv, + path=join(chutney_root), env=env, ) + return proto.done + + # now, as per Chutney's README, we have to create the network + pytest_twisted.blockon(chutney(("configure", basic_network))) + + # before we start the network, ensure we will tear down at the end + def cleanup(): + print("Tearing down Chutney Tor network") try: - block_with_timeout(proto.done, reactor) + block_with_timeout(chutney(("stop", basic_network)), reactor) except ProcessTerminated: # If this doesn't exit cleanly, that's fine, that shouldn't fail # the test suite. pass - request.addfinalizer(cleanup) - return chut + pytest_twisted.blockon(chutney(("start", basic_network))) + pytest_twisted.blockon(chutney(("wait_for_bootstrap", basic_network))) + + # print some useful stuff + try: + pytest_twisted.blockon(chutney(("status", basic_network))) + except ProcessTerminated: + print("Chutney.TorNet status failed (continuing)") diff --git a/integration/grid.py b/integration/grid.py index 8c7e7624b..ec8b1e0e0 100644 --- a/integration/grid.py +++ b/integration/grid.py @@ -99,7 +99,7 @@ def create_flog_gatherer(reactor, request, temp_dir, flog_binary): ) yield out_protocol.done - twistd_protocol = _MagicTextProtocol("Gatherer waiting at") + twistd_protocol = _MagicTextProtocol("Gatherer waiting at", "gatherer") twistd_process = reactor.spawnProcess( twistd_protocol, which('twistd')[0], @@ -341,7 +341,7 @@ def create_introducer(reactor, request, temp_dir, flog_gatherer, port): # on windows, "tahoe start" means: run forever in the foreground, # but on linux it means daemonize. "tahoe run" is consistent # between platforms. - protocol = _MagicTextProtocol('introducer running') + protocol = _MagicTextProtocol('introducer running', "introducer") transport = _tahoe_runner_optional_coverage( protocol, reactor, diff --git a/integration/test_get_put.py b/integration/test_get_put.py index 1b6c30072..e30a34f97 100644 --- a/integration/test_get_put.py +++ b/integration/test_get_put.py @@ -4,11 +4,11 @@ and stdout. """ from subprocess import Popen, PIPE, check_output, check_call -import sys import pytest -from pytest_twisted import ensureDeferred from twisted.internet import reactor +from twisted.internet.threads import blockingCallFromThread +from twisted.internet.defer import Deferred from .util import run_in_thread, cli, reconfigure @@ -50,6 +50,7 @@ def test_put_from_stdin(alice, get_put_alias, tmpdir): assert read_bytes(tempfile) == DATA +@run_in_thread def test_get_to_stdout(alice, get_put_alias, tmpdir): """ It's possible to upload a file, and then download it to stdout. @@ -67,6 +68,7 @@ def test_get_to_stdout(alice, get_put_alias, tmpdir): assert p.wait() == 0 +@run_in_thread def test_large_file(alice, get_put_alias, tmp_path): """ It's possible to upload and download a larger file. @@ -85,12 +87,8 @@ def test_large_file(alice, get_put_alias, tmp_path): assert outfile.read_bytes() == tempfile.read_bytes() -@pytest.mark.skipif( - sys.platform.startswith("win"), - reason="reconfigure() has issues on Windows" -) -@ensureDeferred -async def test_upload_download_immutable_different_default_max_segment_size(alice, get_put_alias, tmpdir, request): +@run_in_thread +def test_upload_download_immutable_different_default_max_segment_size(alice, get_put_alias, tmpdir, request): """ Tahoe-LAFS used to have a default max segment size of 128KB, and is now 1MB. Test that an upload created when 128KB was the default can be @@ -103,22 +101,25 @@ async def test_upload_download_immutable_different_default_max_segment_size(alic with tempfile.open("wb") as f: f.write(large_data) - async def set_segment_size(segment_size): - await reconfigure( + def set_segment_size(segment_size): + return blockingCallFromThread( reactor, - request, - alice, - (1, 1, 1), - None, - max_segment_size=segment_size - ) + lambda: Deferred.fromCoroutine(reconfigure( + reactor, + request, + alice, + (1, 1, 1), + None, + max_segment_size=segment_size + )) + ) # 1. Upload file 1 with default segment size set to 1MB - await set_segment_size(1024 * 1024) + set_segment_size(1024 * 1024) cli(alice, "put", str(tempfile), "getput:seg1024kb") # 2. Download file 1 with default segment size set to 128KB - await set_segment_size(128 * 1024) + set_segment_size(128 * 1024) assert large_data == check_output( ["tahoe", "--node-directory", alice.node_dir, "get", "getput:seg1024kb", "-"] ) @@ -127,7 +128,7 @@ async def test_upload_download_immutable_different_default_max_segment_size(alic cli(alice, "put", str(tempfile), "getput:seg128kb") # 4. Download file 2 with default segment size set to 1MB - await set_segment_size(1024 * 1024) + set_segment_size(1024 * 1024) assert large_data == check_output( ["tahoe", "--node-directory", alice.node_dir, "get", "getput:seg128kb", "-"] ) diff --git a/integration/test_i2p.py b/integration/test_i2p.py index 2deb01fab..2ee603573 100644 --- a/integration/test_i2p.py +++ b/integration/test_i2p.py @@ -2,26 +2,11 @@ Integration tests for I2P support. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - import sys from os.path import join, exists -from os import mkdir +from os import mkdir, environ from time import sleep - -if PY2: - def which(path): - # This will result in skipping I2P tests on Python 2. Oh well. - return None -else: - from shutil import which +from shutil import which from eliot import log_call @@ -38,6 +23,8 @@ from twisted.internet.error import ProcessExitedAlready from allmydata.test.common import ( write_introducer, ) +from allmydata.node import read_config + if which("docker") is None: pytest.skip('Skipping I2P tests since Docker is unavailable', allow_module_level=True) @@ -50,7 +37,7 @@ if sys.platform.startswith('win'): @pytest.fixture def i2p_network(reactor, temp_dir, request): """Fixture to start up local i2pd.""" - proto = util._MagicTextProtocol("ephemeral keys") + proto = util._MagicTextProtocol("ephemeral keys", "i2pd") reactor.spawnProcess( proto, which("docker"), @@ -62,6 +49,7 @@ def i2p_network(reactor, temp_dir, request): "--log=stdout", "--loglevel=info" ), + env=environ, ) def cleanup(): @@ -82,13 +70,6 @@ def i2p_network(reactor, temp_dir, request): include_result=False, ) def i2p_introducer(reactor, temp_dir, flog_gatherer, request): - config = ''' -[node] -nickname = introducer_i2p -web.port = 4561 -log_gatherer.furl = {log_furl} -'''.format(log_furl=flog_gatherer) - intro_dir = join(temp_dir, 'introducer_i2p') print("making introducer", intro_dir) @@ -108,12 +89,14 @@ log_gatherer.furl = {log_furl} pytest_twisted.blockon(done_proto.done) # over-write the config file with our stuff - with open(join(intro_dir, 'tahoe.cfg'), 'w') as f: - f.write(config) + config = read_config(intro_dir, "tub.port") + config.set_config("node", "nickname", "introducer_i2p") + config.set_config("node", "web.port", "4563") + config.set_config("node", "log_gatherer.furl", flog_gatherer) # "tahoe run" is consistent across Linux/macOS/Windows, unlike the old # "start" command. - protocol = util._MagicTextProtocol('introducer running') + protocol = util._MagicTextProtocol('introducer running', "introducer") transport = util._tahoe_runner_optional_coverage( protocol, reactor, @@ -147,6 +130,7 @@ def i2p_introducer_furl(i2p_introducer, temp_dir): @pytest_twisted.inlineCallbacks +@pytest.mark.skip("I2P tests are not functioning at all, for unknown reasons") def test_i2p_service_storage(reactor, request, temp_dir, flog_gatherer, i2p_network, i2p_introducer_furl): yield _create_anonymous_node(reactor, 'carol_i2p', 8008, request, temp_dir, flog_gatherer, i2p_network, i2p_introducer_furl) yield _create_anonymous_node(reactor, 'dave_i2p', 8009, request, temp_dir, flog_gatherer, i2p_network, i2p_introducer_furl) @@ -170,7 +154,8 @@ def test_i2p_service_storage(reactor, request, temp_dir, flog_gatherer, i2p_netw sys.executable, '-b', '-m', 'allmydata.scripts.runner', '-d', join(temp_dir, 'carol_i2p'), 'put', gold_path, - ) + ), + env=environ, ) yield proto.done cap = proto.output.getvalue().strip().split()[-1] @@ -184,7 +169,8 @@ def test_i2p_service_storage(reactor, request, temp_dir, flog_gatherer, i2p_netw sys.executable, '-b', '-m', 'allmydata.scripts.runner', '-d', join(temp_dir, 'dave_i2p'), 'get', cap, - ) + ), + env=environ, ) yield proto.done @@ -211,7 +197,8 @@ def _create_anonymous_node(reactor, name, control_port, request, temp_dir, flog_ '--hide-ip', '--listen', 'i2p', node_dir.path, - ) + ), + env=environ, ) yield proto.done diff --git a/integration/test_servers_of_happiness.py b/integration/test_servers_of_happiness.py index 3adc11340..8f64696a8 100644 --- a/integration/test_servers_of_happiness.py +++ b/integration/test_servers_of_happiness.py @@ -1,17 +1,10 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import sys from os.path import join +from os import environ from . import util @@ -29,7 +22,7 @@ def test_upload_immutable(reactor, temp_dir, introducer_furl, flog_gatherer, sto happy=7, total=10, ) - util.await_client_ready(edna) + yield util.await_client_ready(edna) node_dir = join(temp_dir, 'edna') @@ -43,7 +36,8 @@ def test_upload_immutable(reactor, temp_dir, introducer_furl, flog_gatherer, sto sys.executable, '-b', '-m', 'allmydata.scripts.runner', '-d', node_dir, 'put', __file__, - ] + ], + env=environ, ) try: yield proto.done diff --git a/integration/test_tor.py b/integration/test_tor.py index 495679a51..32572276a 100644 --- a/integration/test_tor.py +++ b/integration/test_tor.py @@ -1,17 +1,10 @@ """ Ported to Python 3. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import sys from os.path import join +from os import environ import pytest import pytest_twisted @@ -25,6 +18,8 @@ from twisted.python.filepath import ( from allmydata.test.common import ( write_introducer, ) +from allmydata.client import read_config +from allmydata.util.deferredutil import async_to_deferred # see "conftest.py" for the fixtures (e.g. "tor_network") @@ -35,18 +30,28 @@ from allmydata.test.common import ( if sys.platform.startswith('win'): pytest.skip('Skipping Tor tests on Windows', allow_module_level=True) -if PY2: - pytest.skip('Skipping Tor tests on Python 2 because dependencies are hard to come by', allow_module_level=True) - @pytest_twisted.inlineCallbacks def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl): - carol = yield _create_anonymous_node(reactor, 'carol', 8008, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) - dave = yield _create_anonymous_node(reactor, 'dave', 8009, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl) - util.await_client_ready(carol, minimum_number_of_servers=2) - util.await_client_ready(dave, minimum_number_of_servers=2) + """ + Two nodes and an introducer all configured to use Tahoe. + + The two nodes can talk to the introducer and each other: we upload to one + node, read from the other. + """ + carol = yield _create_anonymous_node(reactor, 'carol', 8008, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl, 2) + dave = yield _create_anonymous_node(reactor, 'dave', 8009, request, temp_dir, flog_gatherer, tor_network, tor_introducer_furl, 2) + yield util.await_client_ready(carol, minimum_number_of_servers=2, timeout=600) + yield util.await_client_ready(dave, minimum_number_of_servers=2, timeout=600) + yield upload_to_one_download_from_the_other(reactor, temp_dir, carol, dave) + + +@async_to_deferred +async def upload_to_one_download_from_the_other(reactor, temp_dir, upload_to: util.TahoeProcess, download_from: util.TahoeProcess): + """ + Ensure both nodes are connected to "a grid" by uploading something via one + node, and retrieve it using the other. + """ - # ensure both nodes are connected to "a grid" by uploading - # something via carol, and retrieve it using dave. gold_path = join(temp_dir, "gold") with open(gold_path, "w") as f: f.write( @@ -63,13 +68,14 @@ def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_ne sys.executable, ( sys.executable, '-b', '-m', 'allmydata.scripts.runner', - '-d', join(temp_dir, 'carol'), + '-d', upload_to.node_dir, 'put', gold_path, - ) + ), + env=environ, ) - yield proto.done + await proto.done cap = proto.output.getvalue().strip().split()[-1] - print("TEH CAP!", cap) + print("capability: {}".format(cap)) proto = util._CollectOutputProtocol(capture_stderr=False) reactor.spawnProcess( @@ -77,77 +83,83 @@ def test_onion_service_storage(reactor, request, temp_dir, flog_gatherer, tor_ne sys.executable, ( sys.executable, '-b', '-m', 'allmydata.scripts.runner', - '-d', join(temp_dir, 'dave'), + '-d', download_from.node_dir, 'get', cap, - ) + ), + env=environ, ) - yield proto.done - - dave_got = proto.output.getvalue().strip() - assert dave_got == open(gold_path, 'rb').read().strip() + await proto.done + download_got = proto.output.getvalue().strip() + assert download_got == open(gold_path, 'rb').read().strip() @pytest_twisted.inlineCallbacks -def _create_anonymous_node(reactor, name, control_port, request, temp_dir, flog_gatherer, tor_network, introducer_furl): +def _create_anonymous_node(reactor, name, control_port, request, temp_dir, flog_gatherer, tor_network, introducer_furl, shares_total: int) -> util.TahoeProcess: node_dir = FilePath(temp_dir).child(name) web_port = "tcp:{}:interface=localhost".format(control_port + 2000) if node_dir.exists(): raise RuntimeError( "A node already exists in '{}'".format(node_dir) ) - print("creating", node_dir) + print(f"creating {node_dir.path} with introducer {introducer_furl}") node_dir.makedirs() proto = util._DumpOutputProtocol(None) reactor.spawnProcess( proto, sys.executable, ( - sys.executable, '-m', 'allmydata.scripts.runner', + sys.executable, '-b', '-m', 'allmydata.scripts.runner', 'create-node', '--nickname', name, + '--webport', web_port, '--introducer', introducer_furl, '--hide-ip', '--tor-control-port', 'tcp:localhost:{}'.format(control_port), '--listen', 'tor', + '--shares-needed', '1', + '--shares-happy', '1', + '--shares-total', str(shares_total), node_dir.path, + ), + env=environ, ) - ) yield proto.done # Which services should this client connect to? write_introducer(node_dir, "default", introducer_furl) - with node_dir.child('tahoe.cfg').open('w') as f: - node_config = ''' -[node] -nickname = %(name)s -web.port = %(web_port)s -web.static = public_html -log_gatherer.furl = %(log_furl)s + util.basic_node_configuration(request, flog_gatherer, node_dir.path) -[tor] -control.port = tcp:localhost:%(control_port)d -onion.external_port = 3457 -onion.local_port = %(local_port)d -onion = true -onion.private_key_file = private/tor_onion.privkey - -[client] -shares.needed = 1 -shares.happy = 1 -shares.total = 2 - -''' % { - 'name': name, - 'web_port': web_port, - 'log_furl': flog_gatherer, - 'control_port': control_port, - 'local_port': control_port + 1000, -} - node_config = node_config.encode("utf-8") - f.write(node_config) + config = read_config(node_dir.path, "tub.port") + config.set_config("tor", "onion", "true") + config.set_config("tor", "onion.external_port", "3457") + config.set_config("tor", "control.port", f"tcp:port={control_port}:host=127.0.0.1") + config.set_config("tor", "onion.private_key_file", "private/tor_onion.privkey") print("running") result = yield util._run_node(reactor, node_dir.path, request, None) print("okay, launched") return result + +@pytest.mark.skipif(sys.platform.startswith('darwin'), reason='This test has issues on macOS') +@pytest_twisted.inlineCallbacks +def test_anonymous_client(reactor, request, temp_dir, flog_gatherer, tor_network, introducer_furl): + """ + A normal node (normie) and a normal introducer are configured, and one node + (anonymoose) which is configured to be anonymous by talking via Tor. + + Anonymoose should be able to communicate with normie. + + TODO how to ensure that anonymoose is actually using Tor? + """ + normie = yield util._create_node( + reactor, request, temp_dir, introducer_furl, flog_gatherer, "normie", + web_port="tcp:9989:interface=localhost", + storage=True, needed=1, happy=1, total=1, + ) + yield util.await_client_ready(normie) + + anonymoose = yield _create_anonymous_node(reactor, 'anonymoose', 8008, request, temp_dir, flog_gatherer, tor_network, introducer_furl, 1) + yield util.await_client_ready(anonymoose, minimum_number_of_servers=1, timeout=600) + + yield upload_to_one_download_from_the_other(reactor, temp_dir, normie, anonymoose) diff --git a/integration/test_web.py b/integration/test_web.py index 20cd61632..b863a27fe 100644 --- a/integration/test_web.py +++ b/integration/test_web.py @@ -14,17 +14,21 @@ from __future__ import annotations import time from urllib.parse import unquote as url_unquote, quote as url_quote +from twisted.internet.threads import deferToThread + import allmydata.uri from allmydata.util import jsonbytes as json from . import util +from .util import run_in_thread import requests import html5lib from bs4 import BeautifulSoup -from pytest_twisted import ensureDeferred +import pytest_twisted +@run_in_thread def test_index(alice): """ we can download the index file @@ -32,6 +36,7 @@ def test_index(alice): util.web_get(alice, u"") +@run_in_thread def test_index_json(alice): """ we can download the index file as json @@ -41,6 +46,7 @@ def test_index_json(alice): json.loads(data) +@run_in_thread def test_upload_download(alice): """ upload a file, then download it via readcap @@ -70,6 +76,7 @@ def test_upload_download(alice): assert str(data, "utf-8") == FILE_CONTENTS +@run_in_thread def test_put(alice): """ use PUT to create a file @@ -89,6 +96,7 @@ def test_put(alice): assert cap.needed_shares == int(cfg.get_config("client", "shares.needed")) +@run_in_thread def test_helper_status(storage_nodes): """ successfully GET the /helper_status page @@ -101,6 +109,7 @@ def test_helper_status(storage_nodes): assert str(dom.h1.string) == u"Helper Status" +@run_in_thread def test_deep_stats(alice): """ create a directory, do deep-stats on it and prove the /operations/ @@ -178,7 +187,7 @@ def test_deep_stats(alice): time.sleep(.5) -@util.run_in_thread +@run_in_thread def test_status(alice): """ confirm we get something sensible from /status and the various sub-types @@ -244,7 +253,7 @@ def test_status(alice): assert found_download, "Failed to find the file we downloaded in the status-page" -@ensureDeferred +@pytest_twisted.ensureDeferred async def test_directory_deep_check(reactor, request, alice): """ use deep-check and confirm the result pages work @@ -256,7 +265,10 @@ async def test_directory_deep_check(reactor, request, alice): total = 4 await util.reconfigure(reactor, request, alice, (happy, required, total), convergence=None) + await deferToThread(_test_directory_deep_check_blocking, alice) + +def _test_directory_deep_check_blocking(alice): # create a directory resp = requests.post( util.node_url(alice.node_dir, u"uri"), @@ -417,6 +429,7 @@ async def test_directory_deep_check(reactor, request, alice): assert dom is not None, "Operation never completed" +@run_in_thread def test_storage_info(storage_nodes): """ retrieve and confirm /storage URI for one storage node @@ -428,6 +441,7 @@ def test_storage_info(storage_nodes): ) +@run_in_thread def test_storage_info_json(storage_nodes): """ retrieve and confirm /storage?t=json URI for one storage node @@ -442,6 +456,7 @@ def test_storage_info_json(storage_nodes): assert data[u"stats"][u"storage_server.reserved_space"] == 1000000000 +@run_in_thread def test_introducer_info(introducer): """ retrieve and confirm /introducer URI for the introducer @@ -460,6 +475,7 @@ def test_introducer_info(introducer): assert "subscription_summary" in data +@run_in_thread def test_mkdir_with_children(alice): """ create a directory using ?t=mkdir-with-children diff --git a/integration/util.py b/integration/util.py index 389e010bb..6a3ec57f3 100644 --- a/integration/util.py +++ b/integration/util.py @@ -12,7 +12,7 @@ import sys import time import json from os import mkdir, environ -from os.path import exists, join +from os.path import exists, join, basename from io import StringIO, BytesIO from subprocess import check_output @@ -117,7 +117,6 @@ class _CollectOutputProtocol(ProcessProtocol): self.output.write(data) def errReceived(self, data): - print("ERR: {!r}".format(data)) if self.capture_stderr: self.output.write(data) @@ -153,8 +152,9 @@ class _MagicTextProtocol(ProcessProtocol): and then .callback()s on self.done and .errback's if the process exits """ - def __init__(self, magic_text): + def __init__(self, magic_text: str, name: str) -> None: self.magic_seen = Deferred() + self.name = f"{name}: " self.exited = Deferred() self._magic_text = magic_text self._output = StringIO() @@ -164,7 +164,8 @@ class _MagicTextProtocol(ProcessProtocol): def outReceived(self, data): data = str(data, sys.stdout.encoding) - sys.stdout.write(data) + for line in data.splitlines(): + sys.stdout.write(self.name + line + "\n") self._output.write(data) if not self.magic_seen.called and self._magic_text in self._output.getvalue(): print("Saw '{}' in the logs".format(self._magic_text)) @@ -172,7 +173,8 @@ class _MagicTextProtocol(ProcessProtocol): def errReceived(self, data): data = str(data, sys.stderr.encoding) - sys.stdout.write(data) + for line in data.splitlines(): + sys.stdout.write(self.name + line + "\n") def _cleanup_process_async(transport: IProcessTransport, allow_missing: bool) -> None: @@ -320,7 +322,7 @@ def _run_node(reactor, node_dir, request, magic_text, finalize=True): """ if magic_text is None: magic_text = "client running" - protocol = _MagicTextProtocol(magic_text) + protocol = _MagicTextProtocol(magic_text, basename(node_dir)) # "tahoe run" is consistent across Linux/macOS/Windows, unlike the old # "start" command. @@ -349,6 +351,36 @@ def _run_node(reactor, node_dir, request, magic_text, finalize=True): return d +def basic_node_configuration(request, flog_gatherer, node_dir: str): + """ + Setup common configuration options for a node, given a ``pytest`` request + fixture. + """ + config_path = join(node_dir, 'tahoe.cfg') + config = get_config(config_path) + set_config( + config, + u'node', + u'log_gatherer.furl', + flog_gatherer, + ) + force_foolscap = request.config.getoption("force_foolscap") + assert force_foolscap in (True, False) + set_config( + config, + 'storage', + 'force_foolscap', + str(force_foolscap), + ) + set_config( + config, + 'client', + 'force_foolscap', + str(force_foolscap), + ) + write_config(FilePath(config_path), config) + + def _create_node(reactor, request, temp_dir, introducer_furl, flog_gatherer, name, web_port, storage=True, magic_text=None, @@ -389,29 +421,7 @@ def _create_node(reactor, request, temp_dir, introducer_furl, flog_gatherer, nam created_d = done_proto.done def created(_): - config_path = join(node_dir, 'tahoe.cfg') - config = get_config(config_path) - set_config( - config, - u'node', - u'log_gatherer.furl', - flog_gatherer.furl, - ) - force_foolscap = request.config.getoption("force_foolscap") - assert force_foolscap in (True, False) - set_config( - config, - 'storage', - 'force_foolscap', - str(force_foolscap), - ) - set_config( - config, - 'client', - 'force_foolscap', - str(force_foolscap), - ) - write_config(FilePath(config_path), config) + basic_node_configuration(request, flog_gatherer.furl, node_dir) created_d.addCallback(created) d = Deferred() @@ -468,6 +478,31 @@ class FileShouldVanishException(Exception): ) +def run_in_thread(f): + """Decorator for integration tests that runs code in a thread. + + Because we're using pytest_twisted, tests that rely on the reactor are + expected to return a Deferred and use async APIs so the reactor can run. + + In the case of the integration test suite, it launches nodes in the + background using Twisted APIs. The nodes stdout and stderr is read via + Twisted code. If the reactor doesn't run, reads don't happen, and + eventually the buffers fill up, and the nodes block when they try to flush + logs. + + We can switch to Twisted APIs (treq instead of requests etc.), but + sometimes it's easier or expedient to just have a blocking test. So this + decorator allows you to run the test in a thread, and the reactor can keep + running in the main thread. + + See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3597 for tracking bug. + """ + @wraps(f) + def test(*args, **kwargs): + return deferToThread(lambda: f(*args, **kwargs)) + return test + + def await_file_contents(path, contents, timeout=15, error_if=None): """ wait up to `timeout` seconds for the file at `path` (any path-like @@ -593,6 +628,7 @@ def web_post(tahoe, uri_fragment, **kwargs): return resp.content +@run_in_thread def await_client_ready(tahoe, timeout=10, liveness=60*2, minimum_number_of_servers=1): """ Uses the status API to wait for a client-type node (in `tahoe`, a @@ -617,24 +653,25 @@ def await_client_ready(tahoe, timeout=10, liveness=60*2, minimum_number_of_serve print("waiting because '{}'".format(e)) time.sleep(1) continue + servers = js['servers'] - if len(js['servers']) < minimum_number_of_servers: - print(f"waiting because insufficient servers (expected at least {minimum_number_of_servers})") + if len(servers) < minimum_number_of_servers: + print(f"waiting because {servers} is fewer than required ({minimum_number_of_servers})") time.sleep(1) continue + + print( + f"Now: {time.ctime()}\n" + f"Server last-received-data: {[time.ctime(s['last_received_data']) for s in servers]}" + ) + server_times = [ server['last_received_data'] - for server in js['servers'] + for server in servers ] - # if any times are null/None that server has never been - # contacted (so it's down still, probably) - if any(t is None for t in server_times): - print("waiting because at least one server not contacted") - time.sleep(1) - continue - - # check that all times are 'recent enough' - if any([time.time() - t > liveness for t in server_times]): + # check that all times are 'recent enough' (it's OK if _some_ servers + # are down, we just want to make sure a sufficient number are up) + if len([time.time() - t <= liveness for t in server_times if t is not None]) < minimum_number_of_servers: print("waiting because at least one server too old") time.sleep(1) continue @@ -660,30 +697,6 @@ def generate_ssh_key(path): f.write(s.encode("ascii")) -def run_in_thread(f): - """Decorator for integration tests that runs code in a thread. - - Because we're using pytest_twisted, tests that rely on the reactor are - expected to return a Deferred and use async APIs so the reactor can run. - - In the case of the integration test suite, it launches nodes in the - background using Twisted APIs. The nodes stdout and stderr is read via - Twisted code. If the reactor doesn't run, reads don't happen, and - eventually the buffers fill up, and the nodes block when they try to flush - logs. - - We can switch to Twisted APIs (treq instead of requests etc.), but - sometimes it's easier or expedient to just have a blocking test. So this - decorator allows you to run the test in a thread, and the reactor can keep - running in the main thread. - - See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3597 for tracking bug. - """ - @wraps(f) - def test(*args, **kwargs): - return deferToThread(lambda: f(*args, **kwargs)) - return test - @frozen class CHK: """ @@ -830,16 +843,11 @@ async def reconfigure(reactor, request, node: TahoeProcess, ) if changed: - # TODO reconfigure() seems to have issues on Windows. If you need to - # use it there, delete this assert and try to figure out what's going - # on... - assert not sys.platform.startswith("win") - # restart the node print(f"Restarting {node.node_dir} for ZFEC reconfiguration") await node.restart_async(reactor, request) print("Restarted. Waiting for ready state.") - await_client_ready(node) + await await_client_ready(node) print("Ready.") else: print("Config unchanged, not restarting.") diff --git a/misc/checkers/check_load.py b/misc/checkers/check_load.py index 21576ea3a..01a9ed832 100644 --- a/misc/checkers/check_load.py +++ b/misc/checkers/check_load.py @@ -1,5 +1,3 @@ -from __future__ import print_function - """ this is a load-generating client program. It does all of its work through a given tahoe node (specified by URL), and performs random reads and writes @@ -33,20 +31,11 @@ a mean of 10kB and a max of 100MB, so filesize=min(int(1.0/random(.0002)),1e8) """ +from __future__ import annotations import os, sys, httplib, binascii import urllib, json, random, time, urlparse -try: - from typing import Dict -except ImportError: - pass - -# Python 2 compatibility -from future.utils import PY2 -if PY2: - from future.builtins import str # noqa: F401 - if sys.argv[1] == "--stats": statsfiles = sys.argv[2:] # gather stats every 10 seconds, do a moving-window average of the last @@ -54,9 +43,9 @@ if sys.argv[1] == "--stats": DELAY = 10 MAXSAMPLES = 6 totals = [] - last_stats = {} # type: Dict[str, float] + last_stats : dict[str, float] = {} while True: - stats = {} # type: Dict[str, float] + stats : dict[str, float] = {} for sf in statsfiles: for line in open(sf, "r").readlines(): name, str_value = line.split(":") diff --git a/misc/coding_tools/find-trailing-spaces.py b/misc/coding_tools/find-trailing-spaces.py deleted file mode 100644 index 19e7e3c28..000000000 --- a/misc/coding_tools/find-trailing-spaces.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function - -import os, sys - -from twisted.python import usage - -class Options(usage.Options): - optFlags = [ - ("recursive", "r", "Search for .py files recursively"), - ] - def parseArgs(self, *starting_points): - self.starting_points = starting_points - -found = [False] - -def check(fn): - f = open(fn, "r") - for i,line in enumerate(f.readlines()): - if line == "\n": - continue - if line[-1] == "\n": - line = line[:-1] - if line.rstrip() != line: - # the %s:%d:%d: lets emacs' compile-mode jump to those locations - print("%s:%d:%d: trailing whitespace" % (fn, i+1, len(line)+1)) - found[0] = True - f.close() - -o = Options() -o.parseOptions() -if o['recursive']: - for starting_point in o.starting_points: - for root, dirs, files in os.walk(starting_point): - for fn in [f for f in files if f.endswith(".py")]: - fn = os.path.join(root, fn) - check(fn) -else: - for fn in o.starting_points: - check(fn) -if found[0]: - sys.exit(1) -sys.exit(0) diff --git a/mypy.ini b/mypy.ini index e6e7d16ff..482fd6dd8 100644 --- a/mypy.ini +++ b/mypy.ini @@ -7,4 +7,18 @@ show_error_codes = True warn_unused_configs =True no_implicit_optional = True warn_redundant_casts = True -strict_equality = True \ No newline at end of file +strict_equality = True + +[mypy-allmydata.test.cli.wormholetesting,allmydata.test.test_connection_status] +disallow_any_generics = True +disallow_subclassing_any = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_incomplete_defs = True +check_untyped_defs = True +disallow_untyped_decorators = True +warn_unused_ignores = True +warn_return_any = True +no_implicit_reexport = True +strict_equality = True +strict_concatenate = True diff --git a/newsfragments/3622.minor b/newsfragments/3622.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3880.minor b/newsfragments/3880.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3910.minor b/newsfragments/3910.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3935.minor b/newsfragments/3935.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3970.minor b/newsfragments/3970.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3978.minor b/newsfragments/3978.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3988.minor b/newsfragments/3988.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3989.installation b/newsfragments/3989.installation new file mode 100644 index 000000000..a2155b65c --- /dev/null +++ b/newsfragments/3989.installation @@ -0,0 +1 @@ +tenacity is no longer a dependency. diff --git a/newsfragments/3991.minor b/newsfragments/3991.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3993.minor b/newsfragments/3993.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3994.minor b/newsfragments/3994.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3996.minor b/newsfragments/3996.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3997.installation b/newsfragments/3997.installation new file mode 100644 index 000000000..186be0fc2 --- /dev/null +++ b/newsfragments/3997.installation @@ -0,0 +1 @@ +Tahoe-LAFS is incompatible with cryptography >= 40 and now declares a requirement on an older version. diff --git a/newsfragments/3998.minor b/newsfragments/3998.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3999.bugfix b/newsfragments/3999.bugfix new file mode 100644 index 000000000..a8a8396f4 --- /dev/null +++ b/newsfragments/3999.bugfix @@ -0,0 +1 @@ +A bug where Introducer nodes configured to listen on Tor or I2P would not actually do so has been fixed. \ No newline at end of file diff --git a/newsfragments/4000.minor b/newsfragments/4000.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4001.minor b/newsfragments/4001.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4002.minor b/newsfragments/4002.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4003.minor b/newsfragments/4003.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4005.minor b/newsfragments/4005.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4006.minor b/newsfragments/4006.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4009.minor b/newsfragments/4009.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4010.minor b/newsfragments/4010.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4012.minor b/newsfragments/4012.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4014.minor b/newsfragments/4014.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4015.minor b/newsfragments/4015.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4016.minor b/newsfragments/4016.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4018.minor b/newsfragments/4018.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4019.minor b/newsfragments/4019.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4020.minor b/newsfragments/4020.minor new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/newsfragments/4020.minor @@ -0,0 +1 @@ + diff --git a/newsfragments/4022.minor b/newsfragments/4022.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4023.minor b/newsfragments/4023.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4024.minor b/newsfragments/4024.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4026.minor b/newsfragments/4026.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4027.minor b/newsfragments/4027.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4028.minor b/newsfragments/4028.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4029.bugfix b/newsfragments/4029.bugfix new file mode 100644 index 000000000..3ce4670ec --- /dev/null +++ b/newsfragments/4029.bugfix @@ -0,0 +1,2 @@ +The (still off-by-default) HTTP storage client will now use Tor when Tor-based client-side anonymity was requested. +Previously it would use normal TCP connections and not be anonymous. \ No newline at end of file diff --git a/newsfragments/4035.minor b/newsfragments/4035.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/4036.feature b/newsfragments/4036.feature new file mode 100644 index 000000000..36c062718 --- /dev/null +++ b/newsfragments/4036.feature @@ -0,0 +1 @@ +tahoe run now accepts --allow-stdin-close to mean "keep running if stdin closes" \ No newline at end of file diff --git a/nix/collections-extended.nix b/nix/collections-extended.nix new file mode 100644 index 000000000..05254fc1b --- /dev/null +++ b/nix/collections-extended.nix @@ -0,0 +1,12 @@ +# Package a version that's compatible with Python 3.11. This can go away once +# https://github.com/mlenzen/collections-extended/pull/199 is merged and +# included in a version of nixpkgs we depend on. +{ fetchFromGitHub, collections-extended }: +collections-extended.overrideAttrs (old: { + src = fetchFromGitHub { + owner = "mlenzen"; + repo = "collections-extended"; + rev = "8b93390636d58d28012b8e9d22334ee64ca37d73"; + hash = "sha256-e7RCpNsqyS1d3q0E+uaE4UOEQziueYsRkKEvy3gCHt0="; + }; +}) diff --git a/nix/klein.nix b/nix/klein.nix new file mode 100644 index 000000000..be4426465 --- /dev/null +++ b/nix/klein.nix @@ -0,0 +1,9 @@ +{ klein, fetchPypi }: +klein.overrideAttrs (old: rec { + pname = "klein"; + version = "23.5.0"; + src = fetchPypi { + inherit pname version; + sha256 = "sha256-kGkSt6tBDZp/NRICg5w81zoqwHe9AHHIYcMfDu92Aoc="; + }; +}) diff --git a/nix/pycddl.nix b/nix/pycddl.nix index 703f00595..4c68830d4 100644 --- a/nix/pycddl.nix +++ b/nix/pycddl.nix @@ -27,7 +27,7 @@ # # 8. run `nix-build`. it should succeed. if it does not, seek assistance. # -{ lib, fetchPypi, buildPythonPackage, rustPlatform }: +{ lib, fetchPypi, python, buildPythonPackage, rustPlatform }: buildPythonPackage rec { pname = "pycddl"; version = "0.4.0"; @@ -38,6 +38,12 @@ buildPythonPackage rec { sha256 = "sha256-w0CGbPeiXyS74HqZXyiXhvaAMUaIj5onwjl9gWKAjqY="; }; + # Without this, when building for PyPy, `maturin build` seems to fail to + # find the interpreter at all and then fails early in the build process with + # an error saying "unsupported Python interpreter". We can easily point + # directly at the relevant interpreter, so do that. + maturinBuildFlags = [ "--interpreter" python.executable ]; + nativeBuildInputs = with rustPlatform; [ maturinBuildHook cargoSetupHook diff --git a/nix/pyopenssl.nix b/nix/pyopenssl.nix new file mode 100644 index 000000000..b8966fad1 --- /dev/null +++ b/nix/pyopenssl.nix @@ -0,0 +1,10 @@ +{ pyopenssl, fetchPypi, isPyPy }: +pyopenssl.overrideAttrs (old: rec { + pname = "pyOpenSSL"; + version = "23.2.0"; + name = "${pname}-${version}"; + src = fetchPypi { + inherit pname version; + sha256 = "J2+TH1WkUufeppxxc+mE6ypEB85BPJGKo0tV+C+bi6w="; + }; +}) diff --git a/nix/python-overrides.nix b/nix/python-overrides.nix new file mode 100644 index 000000000..0ed415691 --- /dev/null +++ b/nix/python-overrides.nix @@ -0,0 +1,148 @@ +# Override various Python packages to create a package set that works for +# Tahoe-LAFS on CPython and PyPy. +self: super: +let + + # Run a function on a derivation if and only if we're building for PyPy. + onPyPy = f: drv: if super.isPyPy then f drv else drv; + + # Disable a Python package's test suite. + dontCheck = drv: drv.overrideAttrs (old: { doInstallCheck = false; }); + + # Disable building a Python package's documentation. + dontBuildDocs = alsoDisable: drv: (drv.override ({ + sphinxHook = null; + } // alsoDisable)).overrideAttrs ({ outputs, ... }: { + outputs = builtins.filter (x: "doc" != x) outputs; + }); + +in { + # Some dependencies aren't packaged in nixpkgs so supply our own packages. + pycddl = self.callPackage ./pycddl.nix { }; + txi2p = self.callPackage ./txi2p.nix { }; + + # Some packages are of somewhat too-old versions - update them. + klein = self.callPackage ./klein.nix { + # Avoid infinite recursion. + inherit (super) klein; + }; + txtorcon = self.callPackage ./txtorcon.nix { + inherit (super) txtorcon; + }; + + # Update the version of pyopenssl. + pyopenssl = self.callPackage ./pyopenssl.nix { + pyopenssl = + # Building the docs requires sphinx which brings in a dependency on babel, + # the test suite of which fails. + onPyPy (dontBuildDocs { sphinx-rtd-theme = null; }) + # Avoid infinite recursion. + super.pyopenssl; + }; + + # collections-extended is currently broken for Python 3.11 in nixpkgs but + # we know where a working version lives. + collections-extended = self.callPackage ./collections-extended.nix { + inherit (super) collections-extended; + }; + + # greenlet is incompatible with PyPy but PyPy has a builtin equivalent. + # Fixed in nixpkgs in a5f8184fb816a4fd5ae87136838c9981e0d22c67. + greenlet = onPyPy (drv: null) super.greenlet; + + # tornado and tk pull in a huge dependency trees for functionality we don't + # care about, also tkinter doesn't work on PyPy. + matplotlib = super.matplotlib.override { tornado = null; enableTk = false; }; + + tqdm = super.tqdm.override { + # ibid. + tkinter = null; + # pandas is only required by the part of the test suite covering + # integration with pandas that we don't care about. pandas is a huge + # dependency. + pandas = null; + }; + + # The treq test suite depends on httpbin. httpbin pulls in babel (flask -> + # jinja2 -> babel) and arrow (brotlipy -> construct -> arrow). babel fails + # its test suite and arrow segfaults. + treq = onPyPy dontCheck super.treq; + + # the six test suite fails on PyPy because it depends on dbm which the + # nixpkgs PyPy build appears to be missing. Maybe fixed in nixpkgs in + # a5f8184fb816a4fd5ae87136838c9981e0d22c67. + six = onPyPy dontCheck super.six; + + # Likewise for beautifulsoup4. + beautifulsoup4 = onPyPy (dontBuildDocs {}) super.beautifulsoup4; + + # The autobahn test suite pulls in a vast number of dependencies for + # functionality we don't care about. It might be nice to *selectively* + # disable just some of it but this is easier. + autobahn = onPyPy dontCheck super.autobahn; + + # and python-dotenv tests pulls in a lot of dependencies, including jedi, + # which does not work on PyPy. + python-dotenv = onPyPy dontCheck super.python-dotenv; + + # Upstream package unaccountably includes a sqlalchemy dependency ... but + # the project has no such dependency. Fixed in nixpkgs in + # da10e809fff70fbe1d86303b133b779f09f56503. + aiocontextvars = super.aiocontextvars.override { sqlalchemy = null; }; + + # By default, the sphinx docs are built, which pulls in a lot of + # dependencies - including jedi, which does not work on PyPy. + hypothesis = + (let h = super.hypothesis; + in + if (h.override.__functionArgs.enableDocumentation or false) + then h.override { enableDocumentation = false; } + else h).overrideAttrs ({ nativeBuildInputs, ... }: { + # The nixpkgs expression is missing the tzdata check input. + nativeBuildInputs = nativeBuildInputs ++ [ super.tzdata ]; + }); + + # flaky's test suite depends on nose and nose appears to have Python 3 + # incompatibilities (it includes `print` statements, for example). + flaky = onPyPy dontCheck super.flaky; + + # Replace the deprecated way of running the test suite with the modern way. + # This also drops a bunch of unnecessary build-time dependencies, some of + # which are broken on PyPy. Fixed in nixpkgs in + # 5feb5054bb08ba779bd2560a44cf7d18ddf37fea. + zfec = (super.zfec.override { + setuptoolsTrial = null; + }).overrideAttrs (old: { + checkPhase = "trial zfec"; + }); + + # collections-extended is packaged with poetry-core. poetry-core test suite + # uses virtualenv and virtualenv test suite fails on PyPy. + poetry-core = onPyPy dontCheck super.poetry-core; + + # The test suite fails with some rather irrelevant (to us) string comparison + # failure on PyPy. Probably a PyPy bug but doesn't seem like we should + # care. + rich = onPyPy dontCheck super.rich; + + # The pyutil test suite fails in some ... test ... for some deprecation + # functionality we don't care about. + pyutil = onPyPy dontCheck super.pyutil; + + # testCall1 fails fairly inscrutibly on PyPy. Perhaps someone can fix that, + # or we could at least just skip that one test. Probably better to fix it + # since we actually depend directly and significantly on Foolscap. + foolscap = onPyPy dontCheck super.foolscap; + + # Fixed by nixpkgs PR https://github.com/NixOS/nixpkgs/pull/222246 + psutil = super.psutil.overrideAttrs ({ pytestFlagsArray, disabledTests, ...}: { + # Upstream already disables some tests but there are even more that have + # build impurities that come from build system hardware configuration. + # Skip them too. + pytestFlagsArray = [ "-v" ] ++ pytestFlagsArray; + disabledTests = disabledTests ++ [ "sensors_temperatures" ]; + }); + + # CircleCI build systems don't have enough memory to run this test suite. + lz4 = dontCheck super.lz4; +} diff --git a/nix/tahoe-lafs.nix b/nix/tahoe-lafs.nix index 380260c70..bf3ea83d3 100644 --- a/nix/tahoe-lafs.nix +++ b/nix/tahoe-lafs.nix @@ -34,6 +34,7 @@ let magic-wormhole netifaces psutil + pyyaml pycddl pyrsistent pyutil @@ -48,20 +49,15 @@ let zope_interface ] ++ pickExtraDependencies pythonExtraDependencies extrasNames; - pythonCheckDependencies = with pythonPackages; [ + unitTestDependencies = with pythonPackages; [ beautifulsoup4 fixtures hypothesis mock - paramiko prometheus-client - pytest - pytest-timeout - pytest-twisted - tenacity testtools - towncrier ]; + in buildPythonPackage { inherit pname version; @@ -69,7 +65,7 @@ buildPythonPackage { propagatedBuildInputs = pythonPackageDependencies; inherit doCheck; - checkInputs = pythonCheckDependencies; + checkInputs = unitTestDependencies; checkPhase = '' export TAHOE_LAFS_HYPOTHESIS_PROFILE=ci python -m twisted.trial -j $NIX_BUILD_CORES allmydata diff --git a/nix/txtorcon.nix b/nix/txtorcon.nix new file mode 100644 index 000000000..552c03fd0 --- /dev/null +++ b/nix/txtorcon.nix @@ -0,0 +1,9 @@ +{ txtorcon, fetchPypi }: +txtorcon.overrideAttrs (old: rec { + pname = "txtorcon"; + version = "23.5.0"; + src = fetchPypi { + inherit pname version; + hash = "sha256-k/2Aqd1QX2mNCGT+k9uLapwRRLX+uRUwggtw7YmCZRw="; + }; +}) diff --git a/setup.cfg b/setup.cfg index f4539279e..9415b3ab4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,6 +6,9 @@ develop = update_version develop bdist_egg = update_version bdist_egg bdist_wheel = update_version bdist_wheel +# This has been replaced by ruff (see .ruff.toml), which has same checks as +# flake8 plus many more, and is also faster. However, we're keeping this config +# in case people still use flake8 in IDEs, etc.. [flake8] # Enforce all pyflakes constraints, and also prohibit tabs for indentation. # Reference: diff --git a/setup.py b/setup.py index 82ff45764..7ca2650d5 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,9 @@ install_requires = [ # version of cryptography will *really* be installed. "cryptography >= 2.6", + # * Used for custom HTTPS validation + "pyOpenSSL >= 23.2.0", + # * The SFTP frontend depends on Twisted 11.0.0 to fix the SSH server # rekeying bug # * The SFTP frontend and manhole depend on the conch extra. However, we @@ -136,7 +139,8 @@ install_requires = [ "collections-extended >= 2.0.2", # HTTP server and client - "klein", + # Latest version is necessary to work with latest werkzeug: + "klein >= 23.5.0", # 2.2.0 has a bug: https://github.com/pallets/werkzeug/issues/2465 "werkzeug != 2.2.0", "treq", @@ -159,10 +163,9 @@ setup_requires = [ ] tor_requires = [ - # This is exactly what `foolscap[tor]` means but pip resolves the pair of - # dependencies "foolscap[i2p] foolscap[tor]" to "foolscap[i2p]" so we lose - # this if we don't declare it ourselves! - "txtorcon >= 0.17.0", + # 23.5 added support for custom TLS contexts in web_agent(), which is + # needed for the HTTP storage client to run over Tor. + "txtorcon >= 23.5.0", ] i2p_requires = [ @@ -394,16 +397,31 @@ setup(name="tahoe-lafs", # also set in __init__.py "dulwich", "gpg", ], + + # Here are the dependencies required to set up a reproducible test + # environment. This could be for CI or local development. These + # are *not* library dependencies of the test suite itself. They are + # the tools we use to run the test suite at all. + "testenv": [ + # Pin all of these versions for the same reason you ever want to + # pin anything: to prevent new releases with regressions from + # introducing spurious failures into CI runs for whatever + # development work is happening at the time. The versions + # selected here are just the current versions at the time. + # Bumping them to keep up with future releases is fine as long + # as those releases are known to actually work. + "pip==22.0.3", + "wheel==0.37.1", + "setuptools==60.9.1", + "subunitreporter==22.2.0", + "python-subunit==1.4.2", + "junitxml==0.7", + "coverage==7.2.5", + ], + + # Here are the library dependencies of the test suite. "test": [ - "flake8", - # Pin a specific pyflakes so we don't have different folks - # disagreeing on what is or is not a lint issue. We can bump - # this version from time to time, but we will do it - # intentionally. - "pyflakes == 2.2.0", - "coverage ~= 5.0", "mock", - "tox ~= 3.0", "pytest", "pytest-twisted", "hypothesis >= 3.6.1", @@ -412,8 +430,6 @@ setup(name="tahoe-lafs", # also set in __init__.py "fixtures", "beautifulsoup4", "html5lib", - "junitxml", - "tenacity", # Pin old version until # https://github.com/paramiko/paramiko/issues/1961 is fixed. "paramiko < 2.9", diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 2adf59660..e85ed4fe2 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -7,10 +7,9 @@ import os import stat import time import weakref -from typing import Optional +from typing import Optional, Iterable from base64 import urlsafe_b64encode from functools import partial -# On Python 2 this will be the backported package: from configparser import NoSectionError from foolscap.furl import ( @@ -47,7 +46,7 @@ from allmydata.util.encodingutil import get_filesystem_encoding from allmydata.util.abbreviate import parse_abbreviated_size from allmydata.util.time_format import parse_duration, parse_date from allmydata.util.i2p_provider import create as create_i2p_provider -from allmydata.util.tor_provider import create as create_tor_provider +from allmydata.util.tor_provider import create as create_tor_provider, _Provider as TorProvider from allmydata.stats import StatsProvider from allmydata.history import History from allmydata.interfaces import ( @@ -175,8 +174,6 @@ class KeyGenerator(object): """I return a Deferred that fires with a (verifyingkey, signingkey) pair. The returned key will be 2048 bit""" keysize = 2048 - # RSA key generation for a 2048 bit key takes between 0.8 and 3.2 - # secs signer, verifier = rsa.create_signing_keypair(keysize) return defer.succeed( (verifier, signer) ) @@ -191,7 +188,7 @@ class Terminator(service.Service): return service.Service.stopService(self) -def read_config(basedir, portnumfile, generated_files=[]): +def read_config(basedir, portnumfile, generated_files: Iterable=()): """ Read and validate configuration for a client-style Node. See :method:`allmydata.node.read_config` for parameter meanings (the @@ -270,7 +267,7 @@ def create_client_from_config(config, _client_factory=None, _introducer_factory= introducer_clients = create_introducer_clients(config, main_tub, _introducer_factory) storage_broker = create_storage_farm_broker( config, default_connection_handlers, foolscap_connection_handlers, - tub_options, introducer_clients + tub_options, introducer_clients, tor_provider ) client = _client_factory( @@ -466,7 +463,7 @@ def create_introducer_clients(config, main_tub, _introducer_factory=None): return introducer_clients -def create_storage_farm_broker(config: _Config, default_connection_handlers, foolscap_connection_handlers, tub_options, introducer_clients): +def create_storage_farm_broker(config: _Config, default_connection_handlers, foolscap_connection_handlers, tub_options, introducer_clients, tor_provider: Optional[TorProvider]): """ Create a StorageFarmBroker object, for use by Uploader/Downloader (and everybody else who wants to use storage servers) @@ -502,6 +499,8 @@ def create_storage_farm_broker(config: _Config, default_connection_handlers, foo tub_maker=tub_creator, node_config=config, storage_client_config=storage_client_config, + default_connection_handlers=default_connection_handlers, + tor_provider=tor_provider, ) for ic in introducer_clients: sb.use_introducer(ic) @@ -1105,7 +1104,7 @@ class _Client(node.Node, pollmixin.PollMixin): # may get an opaque node if there were any problems. return self.nodemaker.create_from_cap(write_uri, read_uri, deep_immutable=deep_immutable, name=name) - def create_dirnode(self, initial_children={}, version=None): + def create_dirnode(self, initial_children=None, version=None): d = self.nodemaker.create_new_mutable_directory(initial_children, version=version) return d diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index fdf373b45..ccd045b05 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -678,8 +678,10 @@ class DirectoryNode(object): return d # XXX: Too many arguments? Worthwhile to break into mutable/immutable? - def create_subdirectory(self, namex, initial_children={}, overwrite=True, + def create_subdirectory(self, namex, initial_children=None, overwrite=True, mutable=True, mutable_version=None, metadata=None): + if initial_children is None: + initial_children = {} name = normalize(namex) if self.is_readonly(): return defer.fail(NotWriteableError()) diff --git a/src/allmydata/frontends/sftpd.py b/src/allmydata/frontends/sftpd.py index d2d614c77..7ef9a8820 100644 --- a/src/allmydata/frontends/sftpd.py +++ b/src/allmydata/frontends/sftpd.py @@ -1925,7 +1925,11 @@ class FakeTransport(object): def loseConnection(self): logmsg("FakeTransport.loseConnection()", level=NOISY) - # getPeer and getHost can just raise errors, since we don't know what to return + def getHost(self): + raise NotImplementedError() + + def getPeer(self): + raise NotImplementedError() @implementer(ISession) @@ -1990,15 +1994,18 @@ class Dispatcher(object): def __init__(self, client): self._client = client - def requestAvatar(self, avatarID, mind, interface): + def requestAvatar(self, avatarId, mind, *interfaces): + [interface] = interfaces _assert(interface == IConchUser, interface=interface) - rootnode = self._client.create_node_from_uri(avatarID.rootcap) - handler = SFTPUserHandler(self._client, rootnode, avatarID.username) + rootnode = self._client.create_node_from_uri(avatarId.rootcap) + handler = SFTPUserHandler(self._client, rootnode, avatarId.username) return (interface, handler, handler.logout) class SFTPServer(service.MultiService): - name = "frontend:sftp" + # The type in Twisted for services is wrong in 22.10... + # https://github.com/twisted/twisted/issues/10135 + name = "frontend:sftp" # type: ignore[assignment] def __init__(self, client, accountfile, sftp_portstr, pubkey_file, privkey_file): diff --git a/src/allmydata/hashtree.py b/src/allmydata/hashtree.py index 17467459b..57bdbd9a1 100644 --- a/src/allmydata/hashtree.py +++ b/src/allmydata/hashtree.py @@ -332,7 +332,7 @@ class IncompleteHashTree(CompleteBinaryTreeMixin, list): name += " (leaf [%d] of %d)" % (leafnum, numleaves) return name - def set_hashes(self, hashes={}, leaves={}): + def set_hashes(self, hashes=None, leaves=None): """Add a bunch of hashes to the tree. I will validate these to the best of my ability. If I already have a @@ -382,7 +382,10 @@ class IncompleteHashTree(CompleteBinaryTreeMixin, list): corrupted or one of the received hashes was corrupted. If it raises NotEnoughHashesError, then the otherhashes dictionary was incomplete. """ - + if hashes is None: + hashes = {} + if leaves is None: + leaves = {} assert isinstance(hashes, dict) for h in hashes.values(): assert isinstance(h, bytes) diff --git a/src/allmydata/immutable/upload.py b/src/allmydata/immutable/upload.py index 1d70759ff..36bd86fa6 100644 --- a/src/allmydata/immutable/upload.py +++ b/src/allmydata/immutable/upload.py @@ -2,22 +2,12 @@ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import annotations -from future.utils import PY2, native_str -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from future.utils import native_str from past.builtins import long, unicode from six import ensure_str -try: - from typing import List -except ImportError: - pass - import os, time, weakref, itertools import attr @@ -915,12 +905,12 @@ class _Accum(object): :ivar remaining: The number of bytes still expected. :ivar ciphertext: The bytes accumulated so far. """ - remaining = attr.ib(validator=attr.validators.instance_of(int)) # type: int - ciphertext = attr.ib(default=attr.Factory(list)) # type: List[bytes] + remaining : int = attr.ib(validator=attr.validators.instance_of(int)) + ciphertext : list[bytes] = attr.ib(default=attr.Factory(list)) def extend(self, size, # type: int - ciphertext, # type: List[bytes] + ciphertext, # type: list[bytes] ): """ Accumulate some more ciphertext. @@ -1401,7 +1391,9 @@ class CHKUploader(object): def get_upload_status(self): return self._upload_status -def read_this_many_bytes(uploadable, size, prepend_data=[]): +def read_this_many_bytes(uploadable, size, prepend_data=None): + if prepend_data is None: + prepend_data = [] if size == 0: return defer.succeed([]) d = uploadable.read(size) @@ -1851,7 +1843,9 @@ class Uploader(service.MultiService, log.PrefixingLogMixin): """I am a service that allows file uploading. I am a service-child of the Client. """ - name = "uploader" + # The type in Twisted for services is wrong in 22.10... + # https://github.com/twisted/twisted/issues/10135 + name = "uploader" # type: ignore[assignment] URI_LIT_SIZE_THRESHOLD = 55 def __init__(self, helper_furl=None, stats_provider=None, history=None): diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 467d0d450..0f00c5417 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -17,11 +17,13 @@ if PY2: from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, pow, round, super, range, max, min # noqa: F401 from past.builtins import long +from typing import Dict from zope.interface import Interface, Attribute from twisted.plugin import ( IPlugin, ) +from twisted.internet.defer import Deferred from foolscap.api import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \ ChoiceOf, IntegerConstraint, Any, RemoteInterface, Referenceable @@ -307,12 +309,15 @@ class RIStorageServer(RemoteInterface): store that on disk. """ +# The result of IStorageServer.get_version(): +VersionMessage = Dict[bytes, object] + class IStorageServer(Interface): """ An object capable of storing shares for a storage client. """ - def get_version(): + def get_version() -> Deferred[VersionMessage]: """ :see: ``RIStorageServer.get_version`` """ @@ -493,47 +498,6 @@ class IStorageBroker(Interface): @return: unicode nickname, or None """ - # methods moved from IntroducerClient, need review - def get_all_connections(): - """Return a frozenset of (nodeid, service_name, rref) tuples, one for - each active connection we've established to a remote service. This is - mostly useful for unit tests that need to wait until a certain number - of connections have been made.""" - - def get_all_connectors(): - """Return a dict that maps from (nodeid, service_name) to a - RemoteServiceConnector instance for all services that we are actively - trying to connect to. Each RemoteServiceConnector has the following - public attributes:: - - service_name: the type of service provided, like 'storage' - last_connect_time: when we last established a connection - last_loss_time: when we last lost a connection - - version: the peer's version, from the most recent connection - oldest_supported: the peer's oldest supported version, same - - rref: the RemoteReference, if connected, otherwise None - - This method is intended for monitoring interfaces, such as a web page - that describes connecting and connected peers. - """ - - def get_all_peerids(): - """Return a frozenset of all peerids to whom we have a connection (to - one or more services) established. Mostly useful for unit tests.""" - - def get_all_connections_for(service_name): - """Return a frozenset of (nodeid, service_name, rref) tuples, one - for each active connection that provides the given SERVICE_NAME.""" - - def get_permuted_peers(service_name, key): - """Returns an ordered list of (peerid, rref) tuples, selecting from - the connections that provide SERVICE_NAME, using a hash-based - permutation keyed by KEY. This randomizes the service list in a - repeatable way, to distribute load over many peers. - """ - class IDisplayableServer(Interface): def get_nickname(): @@ -551,16 +515,6 @@ class IServer(IDisplayableServer): def start_connecting(trigger_cb): pass - def get_rref(): - """Obsolete. Use ``get_storage_server`` instead. - - Once a server is connected, I return a RemoteReference. - Before a server is connected for the first time, I return None. - - Note that the rref I return will start producing DeadReferenceErrors - once the connection is lost. - """ - def upload_permitted(): """ :return: True if we should use this server for uploads, False @@ -1447,7 +1401,7 @@ class IDirectoryNode(IFilesystemNode): is a file, or if must_be_file is True and the child is a directory, I raise ChildOfWrongTypeError.""" - def create_subdirectory(name, initial_children={}, overwrite=True, + def create_subdirectory(name, initial_children=None, overwrite=True, mutable=True, mutable_version=None, metadata=None): """I create and attach a directory at the given name. The new directory can be empty, or it can be populated with children @@ -2586,7 +2540,7 @@ class IClient(Interface): @return: a Deferred that fires with an IMutableFileNode instance. """ - def create_dirnode(initial_children={}): + def create_dirnode(initial_children=None): """Create a new unattached dirnode, possibly with initial children. @param initial_children: dict with keys that are unicode child names, @@ -2641,7 +2595,7 @@ class INodeMaker(Interface): for use by unit tests, to create mutable files that are smaller than usual.""" - def create_new_mutable_directory(initial_children={}): + def create_new_mutable_directory(initial_children=None): """I create a new mutable directory, and return a Deferred that will fire with the IDirectoryNode instance when it is ready. If initial_children= is provided (a dict mapping unicode child name to diff --git a/src/allmydata/introducer/client.py b/src/allmydata/introducer/client.py index 07f8a5f7a..a64596f0e 100644 --- a/src/allmydata/introducer/client.py +++ b/src/allmydata/introducer/client.py @@ -35,7 +35,7 @@ class InvalidCacheError(Exception): V2 = b"http://allmydata.org/tahoe/protocols/introducer/v2" -@implementer(RIIntroducerSubscriberClient_v2, IIntroducerClient) +@implementer(RIIntroducerSubscriberClient_v2, IIntroducerClient) # type: ignore[misc] class IntroducerClient(service.Service, Referenceable): def __init__(self, tub, introducer_furl, diff --git a/src/allmydata/introducer/server.py b/src/allmydata/introducer/server.py index f0638439a..157a1b73c 100644 --- a/src/allmydata/introducer/server.py +++ b/src/allmydata/introducer/server.py @@ -2,24 +2,13 @@ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import annotations - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from past.builtins import long from six import ensure_text import time, os.path, textwrap - -try: - from typing import Any, Dict, Union -except ImportError: - pass +from typing import Any, Union from zope.interface import implementer from twisted.application import service @@ -79,10 +68,6 @@ def create_introducer(basedir=u"."): default_connection_handlers, foolscap_connection_handlers = create_connection_handlers(config, i2p_provider, tor_provider) tub_options = create_tub_options(config) - # we don't remember these because the Introducer doesn't make - # outbound connections. - i2p_provider = None - tor_provider = None main_tub = create_main_tub( config, tub_options, default_connection_handlers, foolscap_connection_handlers, i2p_provider, tor_provider, @@ -94,6 +79,8 @@ def create_introducer(basedir=u"."): i2p_provider, tor_provider, ) + i2p_provider.setServiceParent(node) + tor_provider.setServiceParent(node) return defer.succeed(node) except Exception: return Failure() @@ -155,17 +142,20 @@ def stringify_remote_address(rref): return str(remote) +# MyPy doesn't work well with remote interfaces... @implementer(RIIntroducerPublisherAndSubscriberService_v2) -class IntroducerService(service.MultiService, Referenceable): - name = "introducer" +class IntroducerService(service.MultiService, Referenceable): # type: ignore[misc] + # The type in Twisted for services is wrong in 22.10... + # https://github.com/twisted/twisted/issues/10135 + name = "introducer" # type: ignore[assignment] # v1 is the original protocol, added in 1.0 (but only advertised starting # in 1.3), removed in 1.12. v2 is the new signed protocol, added in 1.10 # TODO: reconcile bytes/str for keys - VERSION = { + VERSION : dict[Union[bytes, str], Any]= { #"http://allmydata.org/tahoe/protocols/introducer/v1": { }, b"http://allmydata.org/tahoe/protocols/introducer/v2": { }, b"application-version": allmydata.__full_version__.encode("utf-8"), - } # type: Dict[Union[bytes, str], Any] + } def __init__(self): service.MultiService.__init__(self) diff --git a/src/allmydata/node.py b/src/allmydata/node.py index 34abb307f..6c3082b50 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -4,14 +4,8 @@ a node for Tahoe-LAFS. Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import annotations -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from six import ensure_str, ensure_text import json @@ -23,11 +17,7 @@ import errno from base64 import b32decode, b32encode from errno import ENOENT, EPERM from warnings import warn - -try: - from typing import Union -except ImportError: - pass +from typing import Union, Iterable import attr @@ -182,7 +172,7 @@ def create_node_dir(basedir, readme_text): f.write(readme_text) -def read_config(basedir, portnumfile, generated_files=[], _valid_config=None): +def read_config(basedir, portnumfile, generated_files: Iterable = (), _valid_config=None): """ Read and validate configuration. @@ -281,8 +271,7 @@ def _error_about_old_config_files(basedir, generated_files): raise e -def ensure_text_and_abspath_expanduser_unicode(basedir): - # type: (Union[bytes, str]) -> str +def ensure_text_and_abspath_expanduser_unicode(basedir: Union[bytes, str]) -> str: return abspath_expanduser_unicode(ensure_text(basedir)) @@ -752,7 +741,7 @@ def create_connection_handlers(config, i2p_provider, tor_provider): def create_tub(tub_options, default_connection_handlers, foolscap_connection_handlers, - handler_overrides={}, force_foolscap=False, **kwargs): + handler_overrides=None, force_foolscap=False, **kwargs): """ Create a Tub with the right options and handlers. It will be ephemeral unless the caller provides certFile= in kwargs @@ -766,6 +755,8 @@ def create_tub(tub_options, default_connection_handlers, foolscap_connection_han :param bool force_foolscap: If True, only allow Foolscap, not just HTTPS storage protocol. """ + if handler_overrides is None: + handler_overrides = {} # We listen simultaneously for both Foolscap and HTTPS on the same port, # so we have to create a special Foolscap Tub for that to work: if force_foolscap: @@ -933,7 +924,7 @@ def tub_listen_on(i2p_provider, tor_provider, tub, tubport, location): def create_main_tub(config, tub_options, default_connection_handlers, foolscap_connection_handlers, i2p_provider, tor_provider, - handler_overrides={}, cert_filename="node.pem"): + handler_overrides=None, cert_filename="node.pem"): """ Creates a 'main' Foolscap Tub, typically for use as the top-level access point for a running Node. @@ -954,6 +945,8 @@ def create_main_tub(config, tub_options, :param tor_provider: None, or a _Provider instance if txtorcon + Tor are installed. """ + if handler_overrides is None: + handler_overrides = {} portlocation = _tub_portlocation( config, iputil.get_local_addresses_sync, diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py index 1b7ea5f45..39663bda9 100644 --- a/src/allmydata/nodemaker.py +++ b/src/allmydata/nodemaker.py @@ -135,8 +135,9 @@ class NodeMaker(object): d.addCallback(lambda res: n) return d - def create_new_mutable_directory(self, initial_children={}, version=None): - # initial_children must have metadata (i.e. {} instead of None) + def create_new_mutable_directory(self, initial_children=None, version=None): + if initial_children is None: + initial_children = {} for (name, (node, metadata)) in initial_children.items(): precondition(isinstance(metadata, dict), "create_new_mutable_directory requires metadata to be a dict, not None", metadata) diff --git a/src/allmydata/protocol_switch.py b/src/allmydata/protocol_switch.py index 208efec6c..6a6bf8061 100644 --- a/src/allmydata/protocol_switch.py +++ b/src/allmydata/protocol_switch.py @@ -16,9 +16,10 @@ later in the configuration process. from __future__ import annotations from itertools import chain +from typing import cast from twisted.internet.protocol import Protocol -from twisted.internet.interfaces import IDelayedCall +from twisted.internet.interfaces import IDelayedCall, IReactorFromThreads from twisted.internet.ssl import CertificateOptions from twisted.web.server import Site from twisted.protocols.tls import TLSMemoryBIOFactory @@ -89,7 +90,7 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): certificate=cls.tub.myCertificate.original, ) - http_storage_server = HTTPServer(reactor, storage_server, swissnum) + http_storage_server = HTTPServer(cast(IReactorFromThreads, reactor), storage_server, swissnum) cls.https_factory = TLSMemoryBIOFactory( certificate_options, False, @@ -102,8 +103,15 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): for location_hint in chain.from_iterable( hints.split(",") for hints in cls.tub.locationHints ): - if location_hint.startswith("tcp:"): - _, hostname, port = location_hint.split(":") + if location_hint.startswith("tcp:") or location_hint.startswith("tor:"): + scheme, hostname, port = location_hint.split(":") + if scheme == "tcp": + subscheme = None + else: + subscheme = "tor" + # If we're listening on Tor, the hostname needs to have an + # .onion TLD. + assert hostname.endswith(".onion") port = int(port) storage_nurls.add( build_nurl( @@ -111,9 +119,10 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation): port, str(swissnum, "ascii"), cls.tub.myCertificate.original.to_cryptography(), + subscheme ) ) - # TODO this is probably where we'll have to support Tor and I2P? + # TODO this is where we'll have to support Tor and I2P as well. # See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3888#comment:9 # for discussion (there will be separate tickets added for those at # some point.) diff --git a/src/allmydata/scripts/admin.py b/src/allmydata/scripts/admin.py index 579505399..3acd52267 100644 --- a/src/allmydata/scripts/admin.py +++ b/src/allmydata/scripts/admin.py @@ -112,6 +112,9 @@ class AddGridManagerCertOptions(BaseOptions): return "Usage: tahoe [global-options] admin add-grid-manager-cert [options]" def postOptions(self) -> None: + assert self.parent is not None + assert self.parent.parent is not None + if self['name'] is None: raise usage.UsageError( "Must provide --name option" @@ -123,8 +126,8 @@ class AddGridManagerCertOptions(BaseOptions): data: str if self['filename'] == '-': - print("reading certificate from stdin", file=self.parent.parent.stderr) - data = self.parent.parent.stdin.read() + print("reading certificate from stdin", file=self.parent.parent.stderr) # type: ignore[attr-defined] + data = self.parent.parent.stdin.read() # type: ignore[attr-defined] if len(data) == 0: raise usage.UsageError( "Reading certificate from stdin failed" @@ -255,9 +258,9 @@ def do_admin(options): return f(so) -subCommands = [ +subCommands : SubCommands = [ ("admin", None, AdminCommand, "admin subcommands: use 'tahoe admin' for a list"), - ] # type: SubCommands + ] dispatch = { "admin": do_admin, diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 579b37906..6e1f28d11 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,22 +1,10 @@ """ Ported to Python 3. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - import os.path, re, fnmatch -try: - from allmydata.scripts.types_ import SubCommands, Parameters -except ImportError: - pass +from allmydata.scripts.types_ import SubCommands, Parameters from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ @@ -29,14 +17,14 @@ NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") _default_nodedir = get_default_nodedir() class FileStoreOptions(BaseOptions): - optParameters = [ + optParameters : Parameters = [ ["node-url", "u", None, "Specify the URL of the Tahoe gateway node, such as " "'http://127.0.0.1:3456'. " "This overrides the URL found in the --node-directory ."], ["dir-cap", None, None, "Specify which dirnode URI should be used as the 'tahoe' alias."] - ] # type: Parameters + ] def postOptions(self): self["quiet"] = self.parent["quiet"] @@ -484,7 +472,7 @@ class DeepCheckOptions(FileStoreOptions): (which must be a directory), like 'tahoe check' but for multiple files. Optionally repair any problems found.""" -subCommands = [ +subCommands : SubCommands = [ ("mkdir", None, MakeDirectoryOptions, "Create a new directory."), ("add-alias", None, AddAliasOptions, "Add a new alias cap."), ("create-alias", None, CreateAliasOptions, "Create a new alias cap."), @@ -503,7 +491,7 @@ subCommands = [ ("check", None, CheckOptions, "Check a single file or directory."), ("deep-check", None, DeepCheckOptions, "Check all files/directories reachable from a starting point."), ("status", None, TahoeStatusCommand, "Various status information."), - ] # type: SubCommands + ] def mkdir(options): from allmydata.scripts import tahoe_mkdir diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py index c9fc8e031..d6ca8556d 100644 --- a/src/allmydata/scripts/common.py +++ b/src/allmydata/scripts/common.py @@ -4,29 +4,13 @@ Ported to Python 3. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 -else: - from typing import Union - +from typing import Union, Optional import os, sys, textwrap import codecs from os.path import join import urllib.parse -try: - from typing import Optional - from .types_ import Parameters -except ImportError: - pass - from yaml import ( safe_dump, ) @@ -37,6 +21,8 @@ from allmydata.util.assertutil import precondition from allmydata.util.encodingutil import quote_output, \ quote_local_unicode_path, argv_to_abspath from allmydata.scripts.default_nodedir import _default_nodedir +from .types_ import Parameters + def get_default_nodedir(): return _default_nodedir @@ -59,7 +45,7 @@ class BaseOptions(usage.Options): def opt_version(self): raise usage.UsageError("--version not allowed on subcommands") - description = None # type: Optional[str] + description : Optional[str] = None description_unwrapped = None # type: Optional[str] def __str__(self): @@ -80,10 +66,10 @@ class BaseOptions(usage.Options): class BasedirOptions(BaseOptions): default_nodedir = _default_nodedir - optParameters = [ + optParameters : Parameters = [ ["basedir", "C", None, "Specify which Tahoe base directory should be used. [default: %s]" % quote_local_unicode_path(_default_nodedir)], - ] # type: Parameters + ] def parseArgs(self, basedir=None): # This finds the node-directory option correctly even if we are in a subcommand. @@ -283,9 +269,8 @@ def get_alias(aliases, path_unicode, default): quote_output(alias)) return uri.from_string_dirnode(aliases[alias]).to_string(), path[colon+1:] -def escape_path(path): - # type: (Union[str,bytes]) -> str - u""" +def escape_path(path: Union[str, bytes]) -> str: + """ Return path quoted to US-ASCII, valid URL characters. >>> path = u'/føö/bar/☃' @@ -302,9 +287,4 @@ def escape_path(path): ]), "ascii" ) - # Eventually (i.e. as part of Python 3 port) we want this to always return - # Unicode strings. However, to reduce diff sizes in the short term it'll - # return native string (i.e. bytes) on Python 2. - if PY2: - result = result.encode("ascii").__native__() return result diff --git a/src/allmydata/scripts/common_http.py b/src/allmydata/scripts/common_http.py index 95099a2eb..f138b9c07 100644 --- a/src/allmydata/scripts/common_http.py +++ b/src/allmydata/scripts/common_http.py @@ -1,19 +1,11 @@ """ -Ported to Python 3. +Blocking HTTP client APIs. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import os from io import BytesIO -from six.moves import urllib, http_client -import six +from http import client as http_client +import urllib import allmydata # for __full_version__ from allmydata.util.encodingutil import quote_output @@ -51,7 +43,7 @@ class BadResponse(object): def do_http(method, url, body=b""): if isinstance(body, bytes): body = BytesIO(body) - elif isinstance(body, six.text_type): + elif isinstance(body, str): raise TypeError("do_http body must be a bytestring, not unicode") else: # We must give a Content-Length header to twisted.web, otherwise it @@ -61,10 +53,17 @@ def do_http(method, url, body=b""): assert body.seek assert body.read scheme, host, port, path = parse_url(url) + + # For testing purposes, allow setting a timeout on HTTP requests. If this + # ever become a user-facing feature, this should probably be a CLI option? + timeout = os.environ.get("__TAHOE_CLI_HTTP_TIMEOUT", None) + if timeout is not None: + timeout = float(timeout) + if scheme == "http": - c = http_client.HTTPConnection(host, port) + c = http_client.HTTPConnection(host, port, timeout=timeout, blocksize=65536) elif scheme == "https": - c = http_client.HTTPSConnection(host, port) + c = http_client.HTTPSConnection(host, port, timeout=timeout, blocksize=65536) else: raise ValueError("unknown scheme '%s', need http or https" % scheme) c.putrequest(method, path) @@ -85,7 +84,7 @@ def do_http(method, url, body=b""): return BadResponse(url, err) while True: - data = body.read(8192) + data = body.read(65536) if not data: break c.send(data) @@ -94,16 +93,14 @@ def do_http(method, url, body=b""): def format_http_success(resp): - # ensure_text() shouldn't be necessary when Python 2 is dropped. return quote_output( - "%s %s" % (resp.status, six.ensure_text(resp.reason)), + "%s %s" % (resp.status, resp.reason), quotemarks=False) def format_http_error(msg, resp): - # ensure_text() shouldn't be necessary when Python 2 is dropped. return quote_output( - "%s: %s %s\n%s" % (msg, resp.status, six.ensure_text(resp.reason), - six.ensure_text(resp.read())), + "%s: %s %s\n%r" % (msg, resp.status, resp.reason, + resp.read()), quotemarks=False) def check_http_error(resp, stderr): diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index 5d9da518b..7d15b95ec 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -1,25 +1,11 @@ -# Ported to Python 3 - -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - import io import os -try: - from allmydata.scripts.types_ import ( - SubCommands, - Parameters, - Flags, - ) -except ImportError: - pass +from allmydata.scripts.types_ import ( + SubCommands, + Parameters, + Flags, +) from twisted.internet import reactor, defer from twisted.python.usage import UsageError @@ -48,7 +34,7 @@ def write_tac(basedir, nodetype): fileutil.write(os.path.join(basedir, "tahoe-%s.tac" % (nodetype,)), dummy_tac) -WHERE_OPTS = [ +WHERE_OPTS : Parameters = [ ("location", None, None, "Server location to advertise (e.g. tcp:example.org:12345)"), ("port", None, None, @@ -57,29 +43,29 @@ WHERE_OPTS = [ "Hostname to automatically set --location/--port when --listen=tcp"), ("listen", None, "tcp", "Comma-separated list of listener types (tcp,tor,i2p,none)."), -] # type: Parameters +] -TOR_OPTS = [ +TOR_OPTS : Parameters = [ ("tor-control-port", None, None, "Tor's control port endpoint descriptor string (e.g. tcp:127.0.0.1:9051 or unix:/var/run/tor/control)"), ("tor-executable", None, None, "The 'tor' executable to run (default is to search $PATH)."), -] # type: Parameters +] -TOR_FLAGS = [ +TOR_FLAGS : Flags = [ ("tor-launch", None, "Launch a tor instead of connecting to a tor control port."), -] # type: Flags +] -I2P_OPTS = [ +I2P_OPTS : Parameters = [ ("i2p-sam-port", None, None, "I2P's SAM API port endpoint descriptor string (e.g. tcp:127.0.0.1:7656)"), ("i2p-executable", None, None, "(future) The 'i2prouter' executable to run (default is to search $PATH)."), -] # type: Parameters +] -I2P_FLAGS = [ +I2P_FLAGS : Flags = [ ("i2p-launch", None, "(future) Launch an I2P router instead of connecting to a SAM API port."), -] # type: Flags +] def validate_where_options(o): if o['listen'] == "none": @@ -508,11 +494,11 @@ def create_introducer(config): defer.returnValue(0) -subCommands = [ +subCommands : SubCommands = [ ("create-node", None, CreateNodeOptions, "Create a node that acts as a client, server or both."), ("create-client", None, CreateClientOptions, "Create a client node (with storage initially disabled)."), ("create-introducer", None, CreateIntroducerOptions, "Create an introducer node."), -] # type: SubCommands +] dispatch = { "create-node": create_node, diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index 6201ce28f..b6eba842a 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -1,19 +1,8 @@ """ Ported to Python 3. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from future.utils import PY2, bchr -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -try: - from allmydata.scripts.types_ import SubCommands -except ImportError: - pass +from future.utils import bchr import struct, time, os, sys @@ -31,6 +20,7 @@ from allmydata.mutable.common import NeedMoreDataError from allmydata.immutable.layout import ReadBucketProxy from allmydata.util import base32 from allmydata.util.encodingutil import quote_output +from allmydata.scripts.types_ import SubCommands class DumpOptions(BaseOptions): def getSynopsis(self): @@ -1076,9 +1066,9 @@ def do_debug(options): return f(so) -subCommands = [ +subCommands : SubCommands = [ ("debug", None, DebugCommand, "debug subcommands: use 'tahoe debug' for a list."), - ] # type: SubCommands + ] dispatch = { "debug": do_debug, diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index d9fbc1b0a..18387cea5 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -1,28 +1,15 @@ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - import os, sys -from six.moves import StringIO +from io import StringIO from past.builtins import unicode import six -try: - from allmydata.scripts.types_ import SubCommands -except ImportError: - pass - from twisted.python import usage from twisted.internet import defer, task, threads from allmydata.scripts.common import get_default_nodedir from allmydata.scripts import debug, create_node, cli, \ admin, tahoe_run, tahoe_invite +from allmydata.scripts.types_ import SubCommands from allmydata.util.encodingutil import quote_local_unicode_path, argv_to_unicode from allmydata.util.eliotutil import ( opt_eliot_destination, @@ -47,9 +34,9 @@ if _default_nodedir: NODEDIR_HELP += " [default for most commands: " + quote_local_unicode_path(_default_nodedir) + "]" -process_control_commands = [ +process_control_commands : SubCommands = [ ("run", None, tahoe_run.RunOptions, "run a node without daemonizing"), -] # type: SubCommands +] class Options(usage.Options): diff --git a/src/allmydata/scripts/tahoe_invite.py b/src/allmydata/scripts/tahoe_invite.py index b62d6a463..b44efdeb9 100644 --- a/src/allmydata/scripts/tahoe_invite.py +++ b/src/allmydata/scripts/tahoe_invite.py @@ -1,19 +1,6 @@ """ Ported to Python 3. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -try: - from allmydata.scripts.types_ import SubCommands -except ImportError: - pass from twisted.python import usage from twisted.internet import defer, reactor @@ -21,6 +8,7 @@ from twisted.internet import defer, reactor from allmydata.util.encodingutil import argv_to_abspath from allmydata.util import jsonbytes as json from allmydata.scripts.common import get_default_nodedir, get_introducer_furl +from allmydata.scripts.types_ import SubCommands from allmydata.client import read_config @@ -112,10 +100,10 @@ def invite(options): print("Completed successfully", file=out) -subCommands = [ +subCommands : SubCommands = [ ("invite", None, InviteOptions, "Invite a new node to this grid"), -] # type: SubCommands +] dispatch = { "invite": invite, diff --git a/src/allmydata/scripts/tahoe_run.py b/src/allmydata/scripts/tahoe_run.py index aaf234b61..ff3ff9efd 100644 --- a/src/allmydata/scripts/tahoe_run.py +++ b/src/allmydata/scripts/tahoe_run.py @@ -104,6 +104,11 @@ class RunOptions(BasedirOptions): " [default: %s]" % quote_local_unicode_path(_default_nodedir)), ] + optFlags = [ + ("allow-stdin-close", None, + 'Do not exit when stdin closes ("tahoe run" otherwise will exit).'), + ] + def parseArgs(self, basedir=None, *twistd_args): # This can't handle e.g. 'tahoe run --reactor=foo', since # '--reactor=foo' looks like an option to the tahoe subcommand, not to @@ -156,6 +161,7 @@ class DaemonizeTheRealService(Service, HookMixin): "running": None, } self.stderr = options.parent.stderr + self._close_on_stdin_close = False if options["allow-stdin-close"] else True def startService(self): @@ -199,10 +205,12 @@ class DaemonizeTheRealService(Service, HookMixin): d = service_factory() def created(srv): - srv.setServiceParent(self.parent) + if self.parent is not None: + srv.setServiceParent(self.parent) # exiting on stdin-closed facilitates cleanup when run # as a subprocess - on_stdin_close(reactor, reactor.stop) + if self._close_on_stdin_close: + on_stdin_close(reactor, reactor.stop) d.addCallback(created) d.addErrback(handle_config_error) d.addBoth(self._call_hook, 'running') @@ -213,11 +221,13 @@ class DaemonizeTheRealService(Service, HookMixin): class DaemonizeTahoeNodePlugin(object): tapname = "tahoenode" - def __init__(self, nodetype, basedir): + def __init__(self, nodetype, basedir, allow_stdin_close): self.nodetype = nodetype self.basedir = basedir + self.allow_stdin_close = allow_stdin_close def makeService(self, so): + so["allow-stdin-close"] = self.allow_stdin_close return DaemonizeTheRealService(self.nodetype, self.basedir, so) @@ -304,7 +314,9 @@ def run(reactor, config, runApp=twistd.runApp): print(config, file=err) print("tahoe %s: usage error from twistd: %s\n" % (config.subcommand_name, ue), file=err) return 1 - twistd_config.loadedPlugins = {"DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir)} + twistd_config.loadedPlugins = { + "DaemonizeTahoeNode": DaemonizeTahoeNodePlugin(nodetype, basedir, config["allow-stdin-close"]) + } # our own pid-style file contains PID and process creation time pidfile = FilePath(get_pidfile(config['basedir'])) diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index 17a3f41b7..f6d986f85 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -39,6 +39,10 @@ def si_b2a(storageindex): def si_a2b(ascii_storageindex): return base32.a2b(ascii_storageindex) +def si_to_human_readable(storageindex: bytes) -> str: + """Create human-readable string of storage index.""" + return str(base32.b2a(storageindex), "ascii") + def storage_index_to_dir(storageindex): """Convert storage index to directory path. diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 90bda7fc0..9f5d6cce2 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -4,12 +4,27 @@ HTTP client that talks to the HTTP storage server. from __future__ import annotations -from typing import Union, Optional, Sequence, Mapping, BinaryIO + +from typing import ( + Union, + Optional, + Sequence, + Mapping, + BinaryIO, + cast, + TypedDict, + Set, + Dict, + Callable, + ClassVar, +) from base64 import b64encode from io import BytesIO from os import SEEK_END from attrs import define, asdict, frozen, field +from eliot import start_action, register_exception_extractor +from eliot.twisted import DeferredContext # TODO Make sure to import Python version? from cbor2 import loads, dumps @@ -18,8 +33,8 @@ from collections_extended import RangeMap from werkzeug.datastructures import Range, ContentRange from twisted.web.http_headers import Headers from twisted.web import http -from twisted.web.iweb import IPolicyForHTTPS -from twisted.internet.defer import inlineCallbacks, returnValue, fail, Deferred, succeed +from twisted.web.iweb import IPolicyForHTTPS, IResponse, IAgent +from twisted.internet.defer import Deferred, succeed from twisted.internet.interfaces import ( IOpenSSLClientConnectionCreator, IReactorTime, @@ -33,7 +48,6 @@ import treq from treq.client import HTTPClient from treq.testing import StubTreq from OpenSSL import SSL -from cryptography.hazmat.bindings.openssl.binding import Binding from werkzeug.http import parse_content_range_header from .http_common import ( @@ -42,12 +56,20 @@ from .http_common import ( get_content_type, CBOR_MIME_TYPE, get_spki_hash, + response_is_not_html, ) -from .common import si_b2a +from ..interfaces import VersionMessage +from .common import si_b2a, si_to_human_readable from ..util.hashutil import timing_safe_compare from ..util.deferredutil import async_to_deferred +from ..util.tor_provider import _Provider as TorProvider -_OPENSSL = Binding().lib +try: + from txtorcon import Tor # type: ignore +except ImportError: + + class Tor: # type: ignore[no-redef] + pass def _encode_si(si): # type: (bytes) -> str @@ -63,6 +85,9 @@ class ClientException(Exception): self.code = code +register_exception_extractor(ClientException, lambda e: {"response_code": e.code}) + + # Schemas for server responses. # # Tags are of the form #6.nnn, where the number is documented at @@ -70,15 +95,14 @@ class ClientException(Exception): # indicates a set. _SCHEMAS = { "get_version": Schema( + # Note that the single-quoted (`'`) string keys in this schema + # represent *byte* strings - per the CDDL specification. Text strings + # are represented using strings with *double* quotes (`"`). """ response = {'http://allmydata.org/tahoe/protocols/storage/v1' => { 'maximum-immutable-share-size' => uint 'maximum-mutable-share-size' => uint 'available-space' => uint - 'tolerates-immutable-read-overrun' => bool - 'delete-mutable-shares-with-zero-length-writev' => bool - 'fills-holes-with-zero-bytes' => bool - 'prevents-read-past-end-of-share-data' => bool } 'application-version' => bstr } @@ -156,15 +180,24 @@ def limited_content( This will time out if no data is received for 60 seconds; so long as a trickle of data continues to arrive, it will continue to run. """ - d = succeed(None) - timeout = clock.callLater(60, d.cancel) + result_deferred = succeed(None) + + # Sadly, addTimeout() won't work because we need access to the IDelayedCall + # in order to reset it on each data chunk received. + timeout = clock.callLater(60, result_deferred.cancel) collector = _LengthLimitedCollector(max_length, timeout) + with start_action( + action_type="allmydata:storage:http-client:limited-content", + max_length=max_length, + ).context(): + d = DeferredContext(result_deferred) + # Make really sure everything gets called in Deferred context, treq might # call collector directly... d.addCallback(lambda _: treq.collect(response, collector)) - def done(_): + def done(_: object) -> BytesIO: timeout.cancel() collector.f.seek(0) return collector.f @@ -174,7 +207,8 @@ def limited_content( timeout.cancel() return f - return d.addCallbacks(done, failed) + result = d.addCallbacks(done, failed) + return result.addActionFinish() @define @@ -231,11 +265,11 @@ class _TLSContextFactory(CertificateOptions): # not the usual TLS concerns about invalid CAs or revoked # certificates. things_are_ok = ( - _OPENSSL.X509_V_OK, - _OPENSSL.X509_V_ERR_CERT_NOT_YET_VALID, - _OPENSSL.X509_V_ERR_CERT_HAS_EXPIRED, - _OPENSSL.X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT, - _OPENSSL.X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN, + SSL.X509VerificationCodes.OK, + SSL.X509VerificationCodes.ERR_CERT_NOT_YET_VALID, + SSL.X509VerificationCodes.ERR_CERT_HAS_EXPIRED, + SSL.X509VerificationCodes.ERR_DEPTH_ZERO_SELF_SIGNED_CERT, + SSL.X509VerificationCodes.ERR_SELF_SIGNED_CERT_IN_CHAIN, ) # TODO can we do this once instead of multiple times? if errno in things_are_ok and timing_safe_compare( @@ -276,18 +310,30 @@ class _StorageClientHTTPSPolicy: ) -@define(hash=True) -class StorageClient(object): +@define +class StorageClientFactory: """ - Low-level HTTP client that talks to the HTTP storage server. + Create ``StorageClient`` instances, using appropriate + ``twisted.web.iweb.IAgent`` for different connection methods: normal TCP, + Tor, and eventually I2P. + + There is some caching involved since there might be shared setup work, e.g. + connecting to the local Tor service only needs to happen once. """ - # If set, we're doing unit testing and we should call this with - # HTTPConnectionPool we create. - TEST_MODE_REGISTER_HTTP_POOL = None + _default_connection_handlers: dict[str, str] + _tor_provider: Optional[TorProvider] + # Cache the Tor instance created by the provider, if relevant. + _tor_instance: Optional[Tor] = None + + # If set, we're doing unit testing and we should call this with any + # HTTPConnectionPool that gets passed/created to ``create_agent()``. + TEST_MODE_REGISTER_HTTP_POOL: ClassVar[ + Optional[Callable[[HTTPConnectionPool], None]] + ] = None @classmethod - def start_test_mode(cls, callback): + def start_test_mode(cls, callback: Callable[[HTTPConnectionPool], None]) -> None: """Switch to testing mode. In testing mode we register the pool with test system using the given @@ -302,44 +348,90 @@ class StorageClient(object): """Stop testing mode.""" cls.TEST_MODE_REGISTER_HTTP_POOL = None - # The URL is a HTTPS URL ("https://..."). To construct from a NURL, use - # ``StorageClient.from_nurl()``. + async def _create_agent( + self, + nurl: DecodedURL, + reactor: object, + tls_context_factory: IPolicyForHTTPS, + pool: HTTPConnectionPool, + ) -> IAgent: + """Create a new ``IAgent``, possibly using Tor.""" + if self.TEST_MODE_REGISTER_HTTP_POOL is not None: + self.TEST_MODE_REGISTER_HTTP_POOL(pool) + + # TODO default_connection_handlers should really be an object, not a + # dict, so we can ask "is this using Tor" without poking at a + # dictionary with arbitrary strings... See + # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/4032 + handler = self._default_connection_handlers["tcp"] + + if handler == "tcp": + return Agent(reactor, tls_context_factory, pool=pool) + if handler == "tor" or nurl.scheme == "pb+tor": + assert self._tor_provider is not None + if self._tor_instance is None: + self._tor_instance = await self._tor_provider.get_tor_instance(reactor) + return self._tor_instance.web_agent( + pool=pool, tls_context_factory=tls_context_factory + ) + else: + raise RuntimeError(f"Unsupported tcp connection handler: {handler}") + + async def create_storage_client( + self, + nurl: DecodedURL, + reactor: IReactorTime, + pool: Optional[HTTPConnectionPool] = None, + ) -> StorageClient: + """Create a new ``StorageClient`` for the given NURL.""" + assert nurl.fragment == "v=1" + assert nurl.scheme in ("pb", "pb+tor") + if pool is None: + pool = HTTPConnectionPool(reactor) + pool.maxPersistentPerHost = 10 + + certificate_hash = nurl.user.encode("ascii") + agent = await self._create_agent( + nurl, + reactor, + _StorageClientHTTPSPolicy(expected_spki_hash=certificate_hash), + pool, + ) + treq_client = HTTPClient(agent) + https_url = DecodedURL().replace(scheme="https", host=nurl.host, port=nurl.port) + swissnum = nurl.path[0].encode("ascii") + response_check = lambda _: None + if self.TEST_MODE_REGISTER_HTTP_POOL is not None: + response_check = response_is_not_html + + return StorageClient( + https_url, + swissnum, + treq_client, + pool, + reactor, + response_check, + ) + + +@define(hash=True) +class StorageClient(object): + """ + Low-level HTTP client that talks to the HTTP storage server. + + Create using a ``StorageClientFactory`` instance. + """ + + # The URL should be a HTTPS URL ("https://...") _base_url: DecodedURL _swissnum: bytes _treq: Union[treq, StubTreq, HTTPClient] + _pool: HTTPConnectionPool _clock: IReactorTime + # Are we running unit tests? + _analyze_response: Callable[[IResponse], None] = lambda _: None - @classmethod - def from_nurl( - cls, - nurl: DecodedURL, - reactor, - ) -> StorageClient: - """ - Create a ``StorageClient`` for the given NURL. - """ - assert nurl.fragment == "v=1" - assert nurl.scheme == "pb" - swissnum = nurl.path[0].encode("ascii") - certificate_hash = nurl.user.encode("ascii") - pool = HTTPConnectionPool(reactor) - pool.maxPersistentPerHost = 20 - - if cls.TEST_MODE_REGISTER_HTTP_POOL is not None: - cls.TEST_MODE_REGISTER_HTTP_POOL(pool) - - treq_client = HTTPClient( - Agent( - reactor, - _StorageClientHTTPSPolicy(expected_spki_hash=certificate_hash), - pool=pool, - ) - ) - - https_url = DecodedURL().replace(scheme="https", host=nurl.host, port=nurl.port) - return cls(https_url, swissnum, treq_client, reactor) - - def relative_url(self, path): + def relative_url(self, path: str) -> DecodedURL: """Get a URL relative to the base URL.""" return self._base_url.click(path) @@ -353,19 +445,20 @@ class StorageClient(object): ) return headers - def request( + @async_to_deferred + async def request( self, - method, - url, - lease_renew_secret=None, - lease_cancel_secret=None, - upload_secret=None, - write_enabler_secret=None, - headers=None, - message_to_serialize=None, + method: str, + url: DecodedURL, + lease_renew_secret: Optional[bytes] = None, + lease_cancel_secret: Optional[bytes] = None, + upload_secret: Optional[bytes] = None, + write_enabler_secret: Optional[bytes] = None, + headers: Optional[Headers] = None, + message_to_serialize: object = None, timeout: float = 60, **kwargs, - ): + ) -> IResponse: """ Like ``treq.request()``, but with optional secrets that get translated into corresponding HTTP headers. @@ -375,6 +468,41 @@ class StorageClient(object): Default timeout is 60 seconds. """ + with start_action( + action_type="allmydata:storage:http-client:request", + method=method, + url=url.to_text(), + timeout=timeout, + ) as ctx: + response = await self._request( + method, + url, + lease_renew_secret, + lease_cancel_secret, + upload_secret, + write_enabler_secret, + headers, + message_to_serialize, + timeout, + **kwargs, + ) + ctx.add_success_fields(response_code=response.code) + return response + + async def _request( + self, + method: str, + url: DecodedURL, + lease_renew_secret: Optional[bytes] = None, + lease_cancel_secret: Optional[bytes] = None, + upload_secret: Optional[bytes] = None, + write_enabler_secret: Optional[bytes] = None, + headers: Optional[Headers] = None, + message_to_serialize: object = None, + timeout: float = 60, + **kwargs, + ) -> IResponse: + """The implementation of request().""" headers = self._get_headers(headers) # Add secrets: @@ -405,28 +533,39 @@ class StorageClient(object): kwargs["data"] = dumps(message_to_serialize) headers.addRawHeader("Content-Type", CBOR_MIME_TYPE) - return self._treq.request( + response = await self._treq.request( method, url, headers=headers, timeout=timeout, **kwargs ) + self._analyze_response(response) - def decode_cbor(self, response, schema: Schema): + return response + + async def decode_cbor(self, response: IResponse, schema: Schema) -> object: """Given HTTP response, return decoded CBOR body.""" - - def got_content(f: BinaryIO): - data = f.read() - schema.validate_cbor(data) - return loads(data) - - if response.code > 199 and response.code < 300: - content_type = get_content_type(response.headers) - if content_type == CBOR_MIME_TYPE: - return limited_content(response, self._clock).addCallback(got_content) + with start_action(action_type="allmydata:storage:http-client:decode-cbor"): + if response.code > 199 and response.code < 300: + content_type = get_content_type(response.headers) + if content_type == CBOR_MIME_TYPE: + f = await limited_content(response, self._clock) + data = f.read() + schema.validate_cbor(data) + return loads(data) + else: + raise ClientException( + -1, + "Server didn't send CBOR, content type is {}".format( + content_type + ), + ) else: - raise ClientException(-1, "Server didn't send CBOR") - else: - return treq.content(response).addCallback( - lambda data: fail(ClientException(response.code, response.phrase, data)) - ) + data = ( + await limited_content(response, self._clock, max_length=10_000) + ).read() + raise ClientException(response.code, response.phrase, data) + + def shutdown(self) -> Deferred: + """Shutdown any connections.""" + return self._pool.closeCachedConnections() @define(hash=True) @@ -437,32 +576,65 @@ class StorageClientGeneral(object): _client: StorageClient - @inlineCallbacks - def get_version(self): + @async_to_deferred + async def get_version(self) -> VersionMessage: """ Return the version metadata for the server. """ - url = self._client.relative_url("/storage/v1/version") - response = yield self._client.request("GET", url) - decoded_response = yield self._client.decode_cbor( - response, _SCHEMAS["get_version"] - ) - returnValue(decoded_response) + with start_action( + action_type="allmydata:storage:http-client:get-version", + ): + return await self._get_version() - @inlineCallbacks - def add_or_renew_lease( + async def _get_version(self) -> VersionMessage: + """Implementation of get_version().""" + url = self._client.relative_url("/storage/v1/version") + response = await self._client.request("GET", url) + decoded_response = cast( + Dict[bytes, object], + await self._client.decode_cbor(response, _SCHEMAS["get_version"]), + ) + # Add some features we know are true because the HTTP API + # specification requires them and because other parts of the storage + # client implementation assumes they will be present. + cast( + Dict[bytes, object], + decoded_response[b"http://allmydata.org/tahoe/protocols/storage/v1"], + ).update( + { + b"tolerates-immutable-read-overrun": True, + b"delete-mutable-shares-with-zero-length-writev": True, + b"fills-holes-with-zero-bytes": True, + b"prevents-read-past-end-of-share-data": True, + } + ) + return decoded_response + + @async_to_deferred + async def add_or_renew_lease( self, storage_index: bytes, renew_secret: bytes, cancel_secret: bytes - ) -> Deferred[None]: + ) -> None: """ Add or renew a lease. If the renewal secret matches an existing lease, it is renewed. Otherwise a new lease is added. """ + with start_action( + action_type="allmydata:storage:http-client:add-or-renew-lease", + storage_index=si_to_human_readable(storage_index), + ): + return await self._add_or_renew_lease( + storage_index, renew_secret, cancel_secret + ) + + async def _add_or_renew_lease( + self, storage_index: bytes, renew_secret: bytes, cancel_secret: bytes + ) -> None: url = self._client.relative_url( "/storage/v1/lease/{}".format(_encode_si(storage_index)) ) - response = yield self._client.request( + response = await self._client.request( "PUT", url, lease_renew_secret=renew_secret, @@ -487,15 +659,15 @@ class UploadProgress(object): required: RangeMap -@inlineCallbacks -def read_share_chunk( +@async_to_deferred +async def read_share_chunk( client: StorageClient, share_type: str, storage_index: bytes, share_number: int, offset: int, length: int, -) -> Deferred[bytes]: +) -> bytes: """ Download a chunk of data from a share. @@ -516,7 +688,7 @@ def read_share_chunk( # The default 60 second timeout is for getting the response, so it doesn't # include the time it takes to download the body... so we will will deal # with that later, via limited_content(). - response = yield client.request( + response = await client.request( "GET", url, headers=Headers( @@ -530,6 +702,12 @@ def read_share_chunk( if response.code == http.NO_CONTENT: return b"" + content_type = get_content_type(response.headers) + if content_type != "application/octet-stream": + raise ValueError( + f"Content-type was wrong: {content_type}, should be application/octet-stream" + ) + if response.code == http.PARTIAL_CONTENT: content_range = parse_content_range_header( response.headers.getRawHeaders("content-range")[0] or "" @@ -547,7 +725,7 @@ def read_share_chunk( raise ValueError("Server sent more than we asked for?!") # It might also send less than we asked for. That's (probably) OK, e.g. # if we went past the end of the file. - body = yield limited_content(response, client._clock, supposed_length) + body = await limited_content(response, client._clock, supposed_length) body.seek(0, SEEK_END) actual_length = body.tell() if actual_length != supposed_length: @@ -573,7 +751,7 @@ async def advise_corrupt_share( storage_index: bytes, share_number: int, reason: str, -): +) -> None: assert isinstance(reason, str) url = client.relative_url( "/storage/v1/{}/{}/{}/corrupt".format( @@ -598,16 +776,16 @@ class StorageClientImmutables(object): _client: StorageClient - @inlineCallbacks - def create( + @async_to_deferred + async def create( self, - storage_index, - share_numbers, - allocated_size, - upload_secret, - lease_renew_secret, - lease_cancel_secret, - ): # type: (bytes, set[int], int, bytes, bytes, bytes) -> Deferred[ImmutableCreateResult] + storage_index: bytes, + share_numbers: set[int], + allocated_size: int, + upload_secret: bytes, + lease_renew_secret: bytes, + lease_cancel_secret: bytes, + ) -> ImmutableCreateResult: """ Create a new storage index for an immutable. @@ -621,12 +799,41 @@ class StorageClientImmutables(object): Result fires when creating the storage index succeeded, if creating the storage index failed the result will fire with an exception. """ + with start_action( + action_type="allmydata:storage:http-client:immutable:create", + storage_index=si_to_human_readable(storage_index), + share_numbers=share_numbers, + allocated_size=allocated_size, + ) as ctx: + result = await self._create( + storage_index, + share_numbers, + allocated_size, + upload_secret, + lease_renew_secret, + lease_cancel_secret, + ) + ctx.add_success_fields( + already_have=result.already_have, allocated=result.allocated + ) + return result + + async def _create( + self, + storage_index: bytes, + share_numbers: set[int], + allocated_size: int, + upload_secret: bytes, + lease_renew_secret: bytes, + lease_cancel_secret: bytes, + ) -> ImmutableCreateResult: + """Implementation of create().""" url = self._client.relative_url( "/storage/v1/immutable/" + _encode_si(storage_index) ) message = {"share-numbers": share_numbers, "allocated-size": allocated_size} - response = yield self._client.request( + response = await self._client.request( "POST", url, lease_renew_secret=lease_renew_secret, @@ -634,27 +841,37 @@ class StorageClientImmutables(object): upload_secret=upload_secret, message_to_serialize=message, ) - decoded_response = yield self._client.decode_cbor( - response, _SCHEMAS["allocate_buckets"] + decoded_response = cast( + Mapping[str, Set[int]], + await self._client.decode_cbor(response, _SCHEMAS["allocate_buckets"]), ) - returnValue( - ImmutableCreateResult( - already_have=decoded_response["already-have"], - allocated=decoded_response["allocated"], - ) + return ImmutableCreateResult( + already_have=decoded_response["already-have"], + allocated=decoded_response["allocated"], ) - @inlineCallbacks - def abort_upload( + @async_to_deferred + async def abort_upload( self, storage_index: bytes, share_number: int, upload_secret: bytes - ) -> Deferred[None]: + ) -> None: """Abort the upload.""" + with start_action( + action_type="allmydata:storage:http-client:immutable:abort-upload", + storage_index=si_to_human_readable(storage_index), + share_number=share_number, + ): + return await self._abort_upload(storage_index, share_number, upload_secret) + + async def _abort_upload( + self, storage_index: bytes, share_number: int, upload_secret: bytes + ) -> None: + """Implementation of ``abort_upload()``.""" url = self._client.relative_url( "/storage/v1/immutable/{}/{}/abort".format( _encode_si(storage_index), share_number ) ) - response = yield self._client.request( + response = await self._client.request( "PUT", url, upload_secret=upload_secret, @@ -667,10 +884,15 @@ class StorageClientImmutables(object): response.code, ) - @inlineCallbacks - def write_share_chunk( - self, storage_index, share_number, upload_secret, offset, data - ): # type: (bytes, int, bytes, int, bytes) -> Deferred[UploadProgress] + @async_to_deferred + async def write_share_chunk( + self, + storage_index: bytes, + share_number: int, + upload_secret: bytes, + offset: int, + data: bytes, + ) -> UploadProgress: """ Upload a chunk of data for a specific share. @@ -683,12 +905,34 @@ class StorageClientImmutables(object): whether the _complete_ share (i.e. all chunks, not just this one) has been uploaded. """ + with start_action( + action_type="allmydata:storage:http-client:immutable:write-share-chunk", + storage_index=si_to_human_readable(storage_index), + share_number=share_number, + offset=offset, + data_len=len(data), + ) as ctx: + result = await self._write_share_chunk( + storage_index, share_number, upload_secret, offset, data + ) + ctx.add_success_fields(finished=result.finished) + return result + + async def _write_share_chunk( + self, + storage_index: bytes, + share_number: int, + upload_secret: bytes, + offset: int, + data: bytes, + ) -> UploadProgress: + """Implementation of ``write_share_chunk()``.""" url = self._client.relative_url( "/storage/v1/immutable/{}/{}".format( _encode_si(storage_index), share_number ) ) - response = yield self._client.request( + response = await self._client.request( "PATCH", url, upload_secret=upload_secret, @@ -712,52 +956,84 @@ class StorageClientImmutables(object): raise ClientException( response.code, ) - body = yield self._client.decode_cbor( - response, _SCHEMAS["immutable_write_share_chunk"] + body = cast( + Mapping[str, Sequence[Mapping[str, int]]], + await self._client.decode_cbor( + response, _SCHEMAS["immutable_write_share_chunk"] + ), ) remaining = RangeMap() for chunk in body["required"]: remaining.set(True, chunk["begin"], chunk["end"]) - returnValue(UploadProgress(finished=finished, required=remaining)) + return UploadProgress(finished=finished, required=remaining) - def read_share_chunk( - self, storage_index, share_number, offset, length - ): # type: (bytes, int, int, int) -> Deferred[bytes] + @async_to_deferred + async def read_share_chunk( + self, storage_index: bytes, share_number: int, offset: int, length: int + ) -> bytes: """ Download a chunk of data from a share. """ - return read_share_chunk( - self._client, "immutable", storage_index, share_number, offset, length - ) + with start_action( + action_type="allmydata:storage:http-client:immutable:read-share-chunk", + storage_index=si_to_human_readable(storage_index), + share_number=share_number, + offset=offset, + length=length, + ) as ctx: + result = await read_share_chunk( + self._client, "immutable", storage_index, share_number, offset, length + ) + ctx.add_success_fields(data_len=len(result)) + return result - @inlineCallbacks - def list_shares(self, storage_index: bytes) -> Deferred[set[int]]: + @async_to_deferred + async def list_shares(self, storage_index: bytes) -> Set[int]: """ Return the set of shares for a given storage index. """ + with start_action( + action_type="allmydata:storage:http-client:immutable:list-shares", + storage_index=si_to_human_readable(storage_index), + ) as ctx: + result = await self._list_shares(storage_index) + ctx.add_success_fields(shares=result) + return result + + async def _list_shares(self, storage_index: bytes) -> Set[int]: + """Implementation of ``list_shares()``.""" url = self._client.relative_url( "/storage/v1/immutable/{}/shares".format(_encode_si(storage_index)) ) - response = yield self._client.request( + response = await self._client.request( "GET", url, ) if response.code == http.OK: - body = yield self._client.decode_cbor(response, _SCHEMAS["list_shares"]) - returnValue(set(body)) + return cast( + Set[int], + await self._client.decode_cbor(response, _SCHEMAS["list_shares"]), + ) else: raise ClientException(response.code) - def advise_corrupt_share( + @async_to_deferred + async def advise_corrupt_share( self, storage_index: bytes, share_number: int, reason: str, - ): + ) -> None: """Indicate a share has been corrupted, with a human-readable message.""" - return advise_corrupt_share( - self._client, "immutable", storage_index, share_number, reason - ) + with start_action( + action_type="allmydata:storage:http-client:immutable:advise-corrupt-share", + storage_index=si_to_human_readable(storage_index), + share_number=share_number, + reason=reason, + ): + await advise_corrupt_share( + self._client, "immutable", storage_index, share_number, reason + ) @frozen @@ -814,6 +1090,13 @@ class ReadTestWriteResult: reads: Mapping[int, Sequence[bytes]] +# Result type for mutable read/test/write HTTP response. Can't just use +# dict[int,list[bytes]] because on Python 3.8 that will error out. +MUTABLE_RTW = TypedDict( + "MUTABLE_RTW", {"success": bool, "data": Mapping[int, Sequence[bytes]]} +) + + @frozen class StorageClientMutables: """ @@ -841,6 +1124,29 @@ class StorageClientMutables: Given a mapping between share numbers and test/write vectors, the tests are done and if they are valid the writes are done. """ + with start_action( + action_type="allmydata:storage:http-client:mutable:read-test-write", + storage_index=si_to_human_readable(storage_index), + ): + return await self._read_test_write_chunks( + storage_index, + write_enabler_secret, + lease_renew_secret, + lease_cancel_secret, + testwrite_vectors, + read_vector, + ) + + async def _read_test_write_chunks( + self, + storage_index: bytes, + write_enabler_secret: bytes, + lease_renew_secret: bytes, + lease_cancel_secret: bytes, + testwrite_vectors: dict[int, TestWriteVectors], + read_vector: list[ReadVector], + ) -> ReadTestWriteResult: + """Implementation of ``read_test_write_chunks()``.""" url = self._client.relative_url( "/storage/v1/mutable/{}/read-test-write".format(_encode_si(storage_index)) ) @@ -860,50 +1166,83 @@ class StorageClientMutables: message_to_serialize=message, ) if response.code == http.OK: - result = await self._client.decode_cbor( - response, _SCHEMAS["mutable_read_test_write"] + result = cast( + MUTABLE_RTW, + await self._client.decode_cbor( + response, _SCHEMAS["mutable_read_test_write"] + ), ) return ReadTestWriteResult(success=result["success"], reads=result["data"]) else: raise ClientException(response.code, (await response.content())) - def read_share_chunk( + @async_to_deferred + async def read_share_chunk( self, storage_index: bytes, share_number: int, offset: int, length: int, - ) -> Deferred[bytes]: + ) -> bytes: """ Download a chunk of data from a share. """ - return read_share_chunk( - self._client, "mutable", storage_index, share_number, offset, length - ) + with start_action( + action_type="allmydata:storage:http-client:mutable:read-share-chunk", + storage_index=si_to_human_readable(storage_index), + share_number=share_number, + offset=offset, + length=length, + ) as ctx: + result = await read_share_chunk( + self._client, "mutable", storage_index, share_number, offset, length + ) + ctx.add_success_fields(data_len=len(result)) + return result @async_to_deferred - async def list_shares(self, storage_index: bytes) -> set[int]: + async def list_shares(self, storage_index: bytes) -> Set[int]: """ List the share numbers for a given storage index. """ + with start_action( + action_type="allmydata:storage:http-client:mutable:list-shares", + storage_index=si_to_human_readable(storage_index), + ) as ctx: + result = await self._list_shares(storage_index) + ctx.add_success_fields(shares=result) + return result + + async def _list_shares(self, storage_index: bytes) -> Set[int]: + """Implementation of ``list_shares()``.""" url = self._client.relative_url( "/storage/v1/mutable/{}/shares".format(_encode_si(storage_index)) ) response = await self._client.request("GET", url) if response.code == http.OK: - return await self._client.decode_cbor( - response, _SCHEMAS["mutable_list_shares"] + return cast( + Set[int], + await self._client.decode_cbor( + response, _SCHEMAS["mutable_list_shares"] + ), ) else: raise ClientException(response.code) - def advise_corrupt_share( + @async_to_deferred + async def advise_corrupt_share( self, storage_index: bytes, share_number: int, reason: str, - ): + ) -> None: """Indicate a share has been corrupted, with a human-readable message.""" - return advise_corrupt_share( - self._client, "mutable", storage_index, share_number, reason - ) + with start_action( + action_type="allmydata:storage:http-client:mutable:advise-corrupt-share", + storage_index=si_to_human_readable(storage_index), + share_number=share_number, + reason=reason, + ): + await advise_corrupt_share( + self._client, "mutable", storage_index, share_number, reason + ) diff --git a/src/allmydata/storage/http_common.py b/src/allmydata/storage/http_common.py index 123ce403b..650d905e9 100644 --- a/src/allmydata/storage/http_common.py +++ b/src/allmydata/storage/http_common.py @@ -12,6 +12,7 @@ from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat from werkzeug.http import parse_options_header from twisted.web.http_headers import Headers +from twisted.web.iweb import IResponse CBOR_MIME_TYPE = "application/cbor" @@ -22,13 +23,25 @@ def get_content_type(headers: Headers) -> Optional[str]: Returns ``None`` if no content-type was set. """ - values = headers.getRawHeaders("content-type") or [None] + values = headers.getRawHeaders("content-type", [None]) or [None] content_type = parse_options_header(values[0])[0] or None return content_type +def response_is_not_html(response: IResponse) -> None: + """ + During tests, this is registered so we can ensure the web server + doesn't give us text/html. + + HTML is never correct except in 404, but it's the default for + Twisted's web server so we assert nothing unexpected happened. + """ + if response.code != 404: + assert get_content_type(response.headers) != "text/html" + + def swissnum_auth_header(swissnum: bytes) -> bytes: - """Return value for ``Authentication`` header.""" + """Return value for ``Authorization`` header.""" return b"Tahoe-LAFS " + b64encode(swissnum).strip() diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index fd7fd1187..c63a4ca08 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -4,7 +4,7 @@ HTTP server for storage. from __future__ import annotations -from typing import Dict, List, Set, Tuple, Any, Callable, Union, cast +from typing import Any, Callable, Union, cast, Optional from functools import wraps from base64 import b64decode import binascii @@ -12,6 +12,7 @@ from tempfile import TemporaryFile from os import SEEK_END, SEEK_SET import mmap +from eliot import start_action from cryptography.x509 import Certificate as CryptoCertificate from zope.interface import implementer from klein import Klein @@ -67,14 +68,14 @@ class ClientSecretsException(Exception): def _extract_secrets( - header_values, required_secrets -): # type: (List[str], Set[Secrets]) -> Dict[Secrets, bytes] + header_values: list[str], required_secrets: set[Secrets] +) -> dict[Secrets, bytes]: """ Given list of values of ``X-Tahoe-Authorization`` headers, and required secrets, return dictionary mapping secrets to decoded values. If too few secrets were given, or too many, a ``ClientSecretsException`` is - raised. + raised; its text is sent in the HTTP response. """ string_key_to_enum = {e.value: e for e in Secrets} result = {} @@ -83,6 +84,10 @@ def _extract_secrets( string_key, string_value = header_value.strip().split(" ", 1) key = string_key_to_enum[string_key] value = b64decode(string_value) + if value == b"": + raise ClientSecretsException( + "Failed to decode secret {}".format(string_key) + ) if key in (Secrets.LEASE_CANCEL, Secrets.LEASE_RENEW) and len(value) != 32: raise ClientSecretsException("Lease secrets must be 32 bytes long") result[key] = value @@ -90,37 +95,68 @@ def _extract_secrets( raise ClientSecretsException("Bad header value(s): {}".format(header_values)) if result.keys() != required_secrets: raise ClientSecretsException( - "Expected {} secrets, got {}".format(required_secrets, result.keys()) + "Expected {} in X-Tahoe-Authorization headers, got {}".format( + [r.value for r in required_secrets], list(result.keys()) + ) ) return result def _authorization_decorator(required_secrets): """ - Check the ``Authorization`` header, and extract ``X-Tahoe-Authorization`` - headers and pass them in. + 1. Check the ``Authorization`` header matches server swissnum. + 2. Extract ``X-Tahoe-Authorization`` headers and pass them in. + 3. Log the request and response. """ def decorator(f): @wraps(f) def route(self, request, *args, **kwargs): - if not timing_safe_compare( - request.requestHeaders.getRawHeaders("Authorization", [""])[0].encode( - "utf-8" - ), - swissnum_auth_header(self._swissnum), - ): - request.setResponseCode(http.UNAUTHORIZED) - return b"" - authorization = request.requestHeaders.getRawHeaders( - "X-Tahoe-Authorization", [] - ) - try: - secrets = _extract_secrets(authorization, required_secrets) - except ClientSecretsException: - request.setResponseCode(http.BAD_REQUEST) - return b"Missing required secrets" - return f(self, request, secrets, *args, **kwargs) + # Don't set text/html content type by default: + request.defaultContentType = None + + with start_action( + action_type="allmydata:storage:http-server:handle-request", + method=request.method, + path=request.path, + ) as ctx: + try: + # Check Authorization header: + try: + auth_header = request.requestHeaders.getRawHeaders( + "Authorization", [""] + )[0].encode("utf-8") + except UnicodeError: + raise _HTTPError(http.BAD_REQUEST, "Bad Authorization header") + if not timing_safe_compare( + auth_header, + swissnum_auth_header(self._swissnum), + ): + raise _HTTPError( + http.UNAUTHORIZED, "Wrong Authorization header" + ) + + # Check secrets: + authorization = request.requestHeaders.getRawHeaders( + "X-Tahoe-Authorization", [] + ) + try: + secrets = _extract_secrets(authorization, required_secrets) + except ClientSecretsException as e: + raise _HTTPError(http.BAD_REQUEST, str(e)) + + # Run the business logic: + result = f(self, request, secrets, *args, **kwargs) + except _HTTPError as e: + # This isn't an error necessarily for logging purposes, + # it's an implementation detail, an easier way to set + # response codes. + ctx.add_success_fields(response_code=e.code) + ctx.finish() + raise + else: + ctx.add_success_fields(response_code=request.code) + return result return route @@ -173,7 +209,7 @@ class UploadsInProgress(object): _uploads: dict[bytes, StorageIndexUploads] = Factory(dict) # Map BucketWriter to (storage index, share number) - _bucketwriters: dict[BucketWriter, Tuple[bytes, int]] = Factory(dict) + _bucketwriters: dict[BucketWriter, tuple[bytes, int]] = Factory(dict) def add_write_bucket( self, @@ -248,8 +284,10 @@ class _HTTPError(Exception): Raise from ``HTTPServer`` endpoint to return the given HTTP response code. """ - def __init__(self, code: int): + def __init__(self, code: int, body: Optional[str] = None): + Exception.__init__(self, (code, body)) self.code = code + self.body = body # CDDL schemas. @@ -273,7 +311,7 @@ _SCHEMAS = { "advise_corrupt_share": Schema( """ request = { - reason: tstr + reason: tstr .size (1..32765) } """ ), @@ -348,13 +386,16 @@ class _ReadRangeProducer: a request. """ - request: Request + request: Optional[Request] read_data: ReadData - result: Deferred + result: Optional[Deferred[bytes]] start: int remaining: int def resumeProducing(self): + if self.result is None or self.request is None: + return + to_read = min(self.remaining, 65536) data = self.read_data(self.start, to_read) assert len(data) <= to_read @@ -403,7 +444,7 @@ class _ReadRangeProducer: def read_range( request: Request, read_data: ReadData, share_length: int -) -> Union[Deferred, bytes]: +) -> Union[Deferred[bytes], bytes]: """ Read an optional ``Range`` header, reads data appropriately via the given callable, writes the data to the request. @@ -440,6 +481,8 @@ def read_range( raise _HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE) offset, end = range_header.ranges[0] + assert end is not None # should've exited in block above this if so + # If we're being ask to read beyond the length of the share, just read # less: end = min(end, share_length) @@ -458,7 +501,7 @@ def read_range( ContentRange("bytes", offset, end).to_header(), ) - d = Deferred() + d: Deferred[bytes] = Deferred() request.registerProducer( _ReadRangeProducer( request, read_data_with_error_handling, d, offset, end - offset @@ -468,6 +511,25 @@ def read_range( return d +def _add_error_handling(app: Klein): + """Add exception handlers to a Klein app.""" + + @app.handle_errors(_HTTPError) + def _http_error(_, request, failure): + """Handle ``_HTTPError`` exceptions.""" + request.setResponseCode(failure.value.code) + if failure.value.body is not None: + return failure.value.body + else: + return b"" + + @app.handle_errors(CDDLValidationError) + def _cddl_validation_error(_, request, failure): + """Handle CDDL validation errors.""" + request.setResponseCode(http.BAD_REQUEST) + return str(failure.value).encode("utf-8") + + class HTTPServer(object): """ A HTTP interface to the storage server. @@ -475,18 +537,7 @@ class HTTPServer(object): _app = Klein() _app.url_map.converters["storage_index"] = StorageIndexConverter - - @_app.handle_errors(_HTTPError) - def _http_error(self, request, failure): - """Handle ``_HTTPError`` exceptions.""" - request.setResponseCode(failure.value.code) - return b"" - - @_app.handle_errors(CDDLValidationError) - def _cddl_validation_error(self, request, failure): - """Handle CDDL validation errors.""" - request.setResponseCode(http.BAD_REQUEST) - return str(failure.value).encode("utf-8") + _add_error_handling(_app) def __init__( self, @@ -592,7 +643,26 @@ class HTTPServer(object): @_authorized_route(_app, set(), "/storage/v1/version", methods=["GET"]) def version(self, request, authorization): """Return version information.""" - return self._send_encoded(request, self._storage_server.get_version()) + return self._send_encoded(request, self._get_version()) + + def _get_version(self) -> dict[bytes, Any]: + """ + Get the HTTP version of the storage server's version response. + + This differs from the Foolscap version by omitting certain obsolete + fields. + """ + v = self._storage_server.get_version() + v1_identifier = b"http://allmydata.org/tahoe/protocols/storage/v1" + v1 = v[v1_identifier] + return { + v1_identifier: { + b"maximum-immutable-share-size": v1[b"maximum-immutable-share-size"], + b"maximum-mutable-share-size": v1[b"maximum-mutable-share-size"], + b"available-space": v1[b"available-space"], + }, + b"application-version": v[b"application-version"], + } ##### Immutable APIs ##### @@ -731,6 +801,7 @@ class HTTPServer(object): ) def read_share_chunk(self, request, authorization, storage_index, share_number): """Read a chunk for an already uploaded immutable.""" + request.setHeader("content-type", "application/octet-stream") try: bucket = self._storage_server.get_buckets(storage_index)[share_number] except KeyError: @@ -779,7 +850,9 @@ class HTTPServer(object): # The reason can be a string with explanation, so in theory it could be # longish? info = await self._read_encoded( - request, _SCHEMAS["advise_corrupt_share"], max_size=32768, + request, + _SCHEMAS["advise_corrupt_share"], + max_size=32768, ) bucket.advise_corrupt_share(info["reason"].encode("utf-8")) return b"" @@ -834,6 +907,7 @@ class HTTPServer(object): ) def read_mutable_chunk(self, request, authorization, storage_index, share_number): """Read a chunk from a mutable.""" + request.setHeader("content-type", "application/octet-stream") try: share_length = self._storage_server.get_mutable_share_length( @@ -926,13 +1000,20 @@ class _TLSEndpointWrapper(object): def build_nurl( - hostname: str, port: int, swissnum: str, certificate: CryptoCertificate + hostname: str, + port: int, + swissnum: str, + certificate: CryptoCertificate, + subscheme: Optional[str] = None, ) -> DecodedURL: """ Construct a HTTPS NURL, given the hostname, port, server swissnum, and x509 certificate for the server. Clients can then connect to the server using this NURL. """ + scheme = "pb" + if subscheme is not None: + scheme = f"{scheme}+{subscheme}" return DecodedURL().replace( fragment="v=1", # how we know this NURL is HTTP-based (i.e. not Foolscap) host=hostname, @@ -944,7 +1025,7 @@ def build_nurl( "ascii", ), ), - scheme="pb", + scheme=scheme, ) @@ -954,7 +1035,7 @@ def listen_tls( endpoint: IStreamServerEndpoint, private_key_path: FilePath, cert_path: FilePath, -) -> Deferred[Tuple[DecodedURL, IListeningPort]]: +) -> Deferred[tuple[DecodedURL, IListeningPort]]: """ Start a HTTPS storage server on the given port, return the NURL and the listening port. diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index c056a7d28..c0d11abfd 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -173,7 +173,9 @@ class LeaseInfo(object): """ return attr.assoc( self, - _expiration_time=new_expire_time, + # MyPy is unhappy with this; long-term solution is likely switch to + # new @frozen attrs API, with type annotations. + _expiration_time=new_expire_time, # type: ignore[call-arg] ) def is_renew_secret(self, candidate_secret): diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py index 7e604388e..ba7dc991a 100644 --- a/src/allmydata/storage/lease_schema.py +++ b/src/allmydata/storage/lease_schema.py @@ -2,19 +2,7 @@ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -try: - from typing import Union -except ImportError: - pass +from typing import Union import attr @@ -68,7 +56,7 @@ class HashedLeaseSerializer(object): """ Hash a lease secret for storage. """ - return blake2b(secret, digest_size=32, encoder=RawEncoder()) + return blake2b(secret, digest_size=32, encoder=RawEncoder) @classmethod def _hash_lease_info(cls, lease_info): @@ -95,8 +83,7 @@ class HashedLeaseSerializer(object): cls._hash_secret, ) - def serialize(self, lease): - # type: (Union[LeaseInfo, HashedLeaseInfo]) -> bytes + def serialize(self, lease: Union[LeaseInfo, HashedLeaseInfo]) -> bytes: if isinstance(lease, LeaseInfo): # v2 of the immutable schema stores lease secrets hashed. If # we're given a LeaseInfo then it holds plaintext secrets. Hash diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 2bf99d74c..858b87b1f 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -2,8 +2,9 @@ Ported to Python 3. """ from __future__ import annotations + from future.utils import bytes_to_native_str -from typing import Dict, Tuple, Iterable +from typing import Iterable, Any import os, re @@ -54,7 +55,9 @@ class StorageServer(service.MultiService): """ Implement the business logic for the storage server. """ - name = 'storage' + # The type in Twisted for services is wrong in 22.10... + # https://github.com/twisted/twisted/issues/10135 + name = 'storage' # type: ignore[assignment] # only the tests change this to anything else LeaseCheckerClass = LeaseCheckingCrawler @@ -823,7 +826,7 @@ class FoolscapStorageServer(Referenceable): # type: ignore # warner/foolscap#78 self._server = storage_server # Canaries and disconnect markers for BucketWriters created via Foolscap: - self._bucket_writer_disconnect_markers = {} # type: Dict[BucketWriter,Tuple[IRemoteReference, object]] + self._bucket_writer_disconnect_markers : dict[BucketWriter, tuple[IRemoteReference, Any]] = {} self._server.register_bucket_writer_close_handler(self._bucket_writer_closed) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index f980dab5e..ee5052e8d 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -33,8 +33,7 @@ Ported to Python 3. from __future__ import annotations from six import ensure_text - -from typing import Union, Any +from typing import Union, Callable, Any, Optional, cast from os import urandom import re import time @@ -45,6 +44,7 @@ import json import attr from hyperlink import DecodedURL +from twisted.web.client import HTTPConnectionPool from zope.interface import ( Attribute, Interface, @@ -54,6 +54,7 @@ from twisted.python.failure import Failure from twisted.web import http from twisted.internet.task import LoopingCall from twisted.internet import defer, reactor +from twisted.internet.interfaces import IReactorTime from twisted.application import service from twisted.plugin import ( getPlugins, @@ -71,6 +72,7 @@ from allmydata.interfaces import ( IServer, IStorageServer, IFoolscapStoragePlugin, + VersionMessage ) from allmydata.grid_manager import ( create_grid_manager_verifier, SignedCertificate @@ -78,17 +80,19 @@ from allmydata.grid_manager import ( from allmydata.crypto import ( ed25519, ) +from allmydata.util.tor_provider import _Provider as TorProvider from allmydata.util import log, base32, connection_status from allmydata.util.assertutil import precondition from allmydata.util.observer import ObserverList from allmydata.util.rrefutil import add_version_to_remote_reference from allmydata.util.hashutil import permute_server_hash from allmydata.util.dictutil import BytesKeyDict, UnicodeKeyDict -from allmydata.util.deferredutil import async_to_deferred +from allmydata.util.deferredutil import async_to_deferred, race from allmydata.storage.http_client import ( StorageClient, StorageClientImmutables, StorageClientGeneral, ClientException as HTTPClientException, StorageClientMutables, - ReadVector, TestWriteVectors, WriteVector, TestVector, ClientException + ReadVector, TestWriteVectors, WriteVector, TestVector, ClientException, + StorageClientFactory ) from .node import _Config @@ -203,8 +207,13 @@ class StorageFarmBroker(service.MultiService): tub_maker, node_config: _Config, storage_client_config=None, + default_connection_handlers=None, + tor_provider: Optional[TorProvider]=None, ): service.MultiService.__init__(self) + if default_connection_handlers is None: + default_connection_handlers = {"tcp": "tcp"} + assert permute_peers # False not implemented yet self.permute_peers = permute_peers self._tub_maker = tub_maker @@ -224,6 +233,8 @@ class StorageFarmBroker(service.MultiService): self.introducer_client = None self._threshold_listeners : list[tuple[float,defer.Deferred[Any]]]= [] # tuples of (threshold, Deferred) self._connected_high_water_mark = 0 + self._tor_provider = tor_provider + self._default_connection_handlers = default_connection_handlers @log_call(action_type=u"storage-client:broker:set-static-servers") def set_static_servers(self, servers): @@ -316,6 +327,8 @@ class StorageFarmBroker(service.MultiService): server_id, server["ann"], grid_manager_verifier=gm_verifier, + default_connection_handlers=self._default_connection_handlers, + tor_provider=self._tor_provider ) s.on_status_changed(lambda _: self._got_connection()) return s @@ -1020,6 +1033,26 @@ class NativeStorageServer(service.MultiService): self._reconnector.reset() +@async_to_deferred +async def _pick_a_http_server( + reactor, + nurls: list[DecodedURL], + request: Callable[[Any, DecodedURL], defer.Deferred[Any]] +) -> DecodedURL: + """Pick the first server we successfully send a request to. + + Fires with ``None`` if no server was found, or with the ``DecodedURL`` of + the first successfully-connected server. + """ + queries = race([ + request(reactor, nurl).addCallback(lambda _, nurl=nurl: nurl) + for nurl in nurls + ]) + + _, nurl = await queries + return nurl + + @implementer(IServer) class HTTPNativeStorageServer(service.MultiService): """ @@ -1030,7 +1063,7 @@ class HTTPNativeStorageServer(service.MultiService): "connected". """ - def __init__(self, server_id: bytes, announcement, reactor=reactor, grid_manager_verifier=None): + def __init__(self, server_id: bytes, announcement, default_connection_handlers: dict[str,str], reactor=reactor, grid_manager_verifier=None, tor_provider: Optional[TorProvider]=None): service.MultiService.__init__(self) assert isinstance(server_id, bytes) self._server_id = server_id @@ -1038,6 +1071,10 @@ class HTTPNativeStorageServer(service.MultiService): self._on_status_changed = ObserverList() self._reactor = reactor self._grid_manager_verifier = grid_manager_verifier + self._storage_client_factory = StorageClientFactory( + default_connection_handlers, tor_provider + ) + furl = announcement["anonymous-storage-FURL"].encode("utf-8") ( self._nickname, @@ -1046,17 +1083,16 @@ class HTTPNativeStorageServer(service.MultiService): self._short_description, self._long_description ) = _parse_announcement(server_id, furl, announcement) - # TODO need some way to do equivalent of Happy Eyeballs for multiple NURLs? - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3935 - nurl = DecodedURL.from_text(announcement[ANONYMOUS_STORAGE_NURLS][0]) - self._istorage_server = _HTTPStorageServer.from_http_client( - StorageClient.from_nurl(nurl, reactor) - ) + self._nurls = [ + DecodedURL.from_text(u) + for u in announcement[ANONYMOUS_STORAGE_NURLS] + ] + self._istorage_server : Optional[_HTTPStorageServer] = None self._connection_status = connection_status.ConnectionStatus.unstarted() self._version = None self._last_connect_time = None - self._connecting_deferred = None + self._connecting_deferred : Optional[defer.Deferred[object]]= None def get_permutation_seed(self): return self._permutation_seed @@ -1168,19 +1204,85 @@ class HTTPNativeStorageServer(service.MultiService): def try_to_connect(self): self._connect() - def _connect(self): - result = self._istorage_server.get_version() + def _connect(self) -> defer.Deferred[object]: + """ + Try to connect to a working storage server. - def remove_connecting_deferred(result): + If called while a previous ``_connect()`` is already running, it will + just return the same ``Deferred``. + + ``LoopingCall.stop()`` doesn't cancel ``Deferred``s, unfortunately: + https://github.com/twisted/twisted/issues/11814. Thus we want to store + the ``Deferred`` so we can cancel it when necessary. + + We also want to return it so that loop iterations take it into account, + and a new iteration doesn't start while we're in the middle of the + previous one. + """ + # Conceivably try_to_connect() was called on this before, in which case + # we already are in the middle of connecting. So in that case just + # return whatever is in progress: + if self._connecting_deferred is not None: + return self._connecting_deferred + + def done(_): self._connecting_deferred = None - return result + connecting = self._pick_server_and_get_version() # Set a short timeout since we're relying on this for server liveness. - self._connecting_deferred = result.addTimeout(5, self._reactor).addBoth( - remove_connecting_deferred).addCallbacks( - self._got_version, - self._failed_to_connect - ) + connecting = connecting.addTimeout(5, self._reactor).addCallbacks( + self._got_version, self._failed_to_connect + ).addBoth(done) + self._connecting_deferred = connecting + return connecting + + @async_to_deferred + async def _pick_server_and_get_version(self): + """ + Minimal implementation of connection logic: pick a server, get its + version. This doesn't deal with errors much, so as to minimize + statefulness. It does change ``self._istorage_server``, so possibly + more refactoring would be useful to remove even that much statefulness. + """ + async def get_istorage_server() -> _HTTPStorageServer: + if self._istorage_server is not None: + return self._istorage_server + + # We haven't selected a server yet, so let's do so. + + # TODO This is somewhat inefficient on startup: it takes two successful + # version() calls before we are live talking to a server, it could only + # be one. See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3992 + + @async_to_deferred + async def request(reactor, nurl: DecodedURL): + # Since we're just using this one off to check if the NURL + # works, no need for persistent pool or other fanciness. + pool = HTTPConnectionPool(reactor, persistent=False) + pool.retryAutomatically = False + storage_client = await self._storage_client_factory.create_storage_client( + nurl, reactor, pool + ) + return await StorageClientGeneral(storage_client).get_version() + + nurl = await _pick_a_http_server(reactor, self._nurls, request) + + # If we've gotten this far, we've found a working NURL. + storage_client = await self._storage_client_factory.create_storage_client( + nurl, cast(IReactorTime, reactor), None + ) + self._istorage_server = _HTTPStorageServer.from_http_client(storage_client) + return self._istorage_server + + try: + storage_server = await get_istorage_server() + + # Get the version from the remote server. + version = await storage_server.get_version() + return version + except Exception as e: + log.msg(f"Failed to connect to a HTTP storage server: {e}", level=log.CURIOUS) + raise def stopService(self): if self._connecting_deferred is not None: @@ -1190,6 +1292,11 @@ class HTTPNativeStorageServer(service.MultiService): if self._lc.running: self._lc.stop() self._failed_to_connect("shut down") + + if self._istorage_server is not None: + client_shutting_down = self._istorage_server._http_client.shutdown() + result.addCallback(lambda _: client_shutting_down) + return result @@ -1355,7 +1462,7 @@ class _HTTPBucketWriter(object): return self.finished -def _ignore_404(failure: Failure) -> Union[Failure, None]: +def _ignore_404(failure: Failure) -> Optional[Failure]: """ Useful for advise_corrupt_share(), since it swallows unknown share numbers in Foolscap. @@ -1397,13 +1504,13 @@ class _HTTPStorageServer(object): _http_client = attr.ib(type=StorageClient) @staticmethod - def from_http_client(http_client): # type: (StorageClient) -> _HTTPStorageServer + def from_http_client(http_client: StorageClient) -> _HTTPStorageServer: """ Create an ``IStorageServer`` from a HTTP ``StorageClient``. """ return _HTTPStorageServer(http_client=http_client) - def get_version(self): + def get_version(self) -> defer.Deferred[VersionMessage]: return StorageClientGeneral(self._http_client).get_version() @defer.inlineCallbacks diff --git a/src/allmydata/test/cli/test_create.py b/src/allmydata/test/cli/test_create.py index 609888fb3..1d1576082 100644 --- a/src/allmydata/test/cli/test_create.py +++ b/src/allmydata/test/cli/test_create.py @@ -1,21 +1,11 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import os -try: - from typing import Any, List, Tuple -except ImportError: - pass +from typing import Any from twisted.trial import unittest from twisted.internet import defer, reactor @@ -356,8 +346,7 @@ class Config(unittest.TestCase): self.assertIn("is not empty", err) self.assertIn("To avoid clobbering anything, I am going to quit now", err) -def fake_config(testcase, module, result): - # type: (unittest.TestCase, Any, Any) -> List[Tuple] +def fake_config(testcase: unittest.TestCase, module: Any, result: Any) -> list[tuple]: """ Monkey-patch a fake configuration function into the given module. diff --git a/src/allmydata/test/cli/test_invite.py b/src/allmydata/test/cli/test_invite.py index 07756eeed..1302e5970 100644 --- a/src/allmydata/test/cli/test_invite.py +++ b/src/allmydata/test/cli/test_invite.py @@ -8,7 +8,7 @@ import json import os from functools import partial from os.path import join -from typing import Awaitable, Callable, Optional, Sequence, TypeVar, Union +from typing import Callable, Optional, Sequence, TypeVar, Union, Coroutine, Any, Tuple, cast, Generator from twisted.internet import defer from twisted.trial import unittest @@ -19,7 +19,8 @@ from ...util.jsonbytes import dumps_bytes from ..common_util import run_cli from ..no_network import GridTestMixin from .common import CLITestMixin -from .wormholetesting import IWormhole, MemoryWormholeServer, TestingHelper, memory_server +from .wormholetesting import MemoryWormholeServer, TestingHelper, memory_server, IWormhole + # Logically: # JSONable = dict[str, Union[JSONable, None, int, float, str, list[JSONable]]] @@ -59,7 +60,7 @@ def make_simple_peer( server: MemoryWormholeServer, helper: TestingHelper, messages: Sequence[JSONable], -) -> Callable[[], Awaitable[IWormhole]]: +) -> Callable[[], Coroutine[defer.Deferred[IWormhole], Any, IWormhole]]: """ Make a wormhole peer that just sends the given messages. @@ -101,18 +102,24 @@ A = TypeVar("A") B = TypeVar("B") def concurrently( - client: Callable[[], Awaitable[A]], - server: Callable[[], Awaitable[B]], -) -> defer.Deferred[tuple[A, B]]: + client: Callable[[], Union[ + Coroutine[defer.Deferred[A], Any, A], + Generator[defer.Deferred[A], Any, A], + ]], + server: Callable[[], Union[ + Coroutine[defer.Deferred[B], Any, B], + Generator[defer.Deferred[B], Any, B], + ]], +) -> defer.Deferred[Tuple[A, B]]: """ Run two asynchronous functions concurrently and asynchronously return a tuple of both their results. """ - return defer.gatherResults([ + result = defer.gatherResults([ defer.Deferred.fromCoroutine(client()), defer.Deferred.fromCoroutine(server()), - ]) - + ]).addCallback(tuple) # type: ignore + return cast(defer.Deferred[Tuple[A, B]], result) class Join(GridTestMixin, CLITestMixin, unittest.TestCase): diff --git a/src/allmydata/test/cli/test_run.py b/src/allmydata/test/cli/test_run.py index e84f52096..2adcfea19 100644 --- a/src/allmydata/test/cli/test_run.py +++ b/src/allmydata/test/cli/test_run.py @@ -1,16 +1,8 @@ """ Tests for ``allmydata.scripts.tahoe_run``. - -Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import re from six.moves import ( @@ -31,6 +23,12 @@ from twisted.python.filepath import ( from twisted.internet.testing import ( MemoryReactor, ) +from twisted.python.failure import ( + Failure, +) +from twisted.internet.error import ( + ConnectionDone, +) from twisted.internet.test.modulehelpers import ( AlternateReactor, ) @@ -147,6 +145,91 @@ class DaemonizeTheRealServiceTests(SyncTestCase): ) +class DaemonizeStopTests(SyncTestCase): + """ + Tests relating to stopping the daemon + """ + def setUp(self): + self.nodedir = FilePath(self.mktemp()) + self.nodedir.makedirs() + config = "" + self.nodedir.child("tahoe.cfg").setContent(config.encode("ascii")) + self.nodedir.child("tahoe-client.tac").touch() + + # arrange to know when reactor.stop() is called + self.reactor = MemoryReactor() + self.stop_calls = [] + + def record_stop(): + self.stop_calls.append(object()) + self.reactor.stop = record_stop + + super().setUp() + + def _make_daemon(self, extra_argv: list[str]) -> DaemonizeTheRealService: + """ + Create the daemonization service. + + :param extra_argv: Extra arguments to pass between ``run`` and the + node path. + """ + options = parse_options(["run"] + extra_argv + [self.nodedir.path]) + options.stdout = StringIO() + options.stderr = StringIO() + options.stdin = StringIO() + run_options = options.subOptions + return DaemonizeTheRealService( + "client", + self.nodedir.path, + run_options, + ) + + def _run_daemon(self) -> None: + """ + Simulate starting up the reactor so the daemon plugin can do its + stuff. + """ + # We happen to know that the service uses reactor.callWhenRunning + # to schedule all its work (though I couldn't tell you *why*). + # Make sure those scheduled calls happen. + waiting = self.reactor.whenRunningHooks[:] + del self.reactor.whenRunningHooks[:] + for f, a, k in waiting: + f(*a, **k) + + def _close_stdin(self) -> None: + """ + Simulate closing the daemon plugin's stdin. + """ + # there should be a single reader: our StandardIO process + # reader for stdin. Simulate it closing. + for r in self.reactor.getReaders(): + r.connectionLost(Failure(ConnectionDone())) + + def test_stop_on_stdin_close(self): + """ + We stop when stdin is closed. + """ + with AlternateReactor(self.reactor): + service = self._make_daemon([]) + service.startService() + self._run_daemon() + self._close_stdin() + self.assertEqual(len(self.stop_calls), 1) + + def test_allow_stdin_close(self): + """ + If --allow-stdin-close is specified then closing stdin doesn't + stop the process + """ + with AlternateReactor(self.reactor): + service = self._make_daemon(["--allow-stdin-close"]) + service.startService() + self._run_daemon() + self._close_stdin() + self.assertEqual(self.stop_calls, []) + + class RunTests(SyncTestCase): """ Tests for ``run``. diff --git a/src/allmydata/test/cli/wormholetesting.py b/src/allmydata/test/cli/wormholetesting.py index 4775ca5ef..3bcad1ebf 100644 --- a/src/allmydata/test/cli/wormholetesting.py +++ b/src/allmydata/test/cli/wormholetesting.py @@ -32,8 +32,9 @@ For example:: from __future__ import annotations -from typing import Iterator, Optional, List, Tuple -from collections.abc import Awaitable +__all__ = ['MemoryWormholeServer', 'TestingHelper', 'memory_server', 'IWormhole'] + +from typing import Iterator, Optional, List, Tuple, Any, TextIO from inspect import getfullargspec from itertools import count from sys import stderr @@ -62,22 +63,23 @@ class MemoryWormholeServer(object): specific application id and relay URL combination. """ _apps: dict[ApplicationKey, _WormholeApp] = field(default=Factory(dict)) - _waiters: dict[ApplicationKey, Deferred] = field(default=Factory(dict)) + _waiters: dict[ApplicationKey, Deferred[IWormhole]] = field(default=Factory(dict)) def create( self, - appid, - relay_url, - reactor, - versions={}, - delegate=None, - journal=None, - tor=None, - timing=None, - stderr=stderr, - _eventual_queue=None, - _enable_dilate=False, - ): + appid: str, + relay_url: str, + reactor: Any, + # Unfortunately we need a mutable default to match the real API + versions: Any={}, # noqa: B006 + delegate: Optional[Any]=None, + journal: Optional[Any]=None, + tor: Optional[Any]=None, + timing: Optional[Any]=None, + stderr: TextIO=stderr, + _eventual_queue: Optional[Any]=None, + _enable_dilate: bool=False, + ) -> _MemoryWormhole: """ Create a wormhole. It will be able to connect to other wormholes created by this instance (and constrained by the normal appid/relay_url @@ -128,13 +130,13 @@ class TestingHelper(object): key = (relay_url, appid) if key in self._server._waiters: raise ValueError(f"There is already a waiter for {key}") - d = Deferred() + d : Deferred[IWormhole] = Deferred() self._server._waiters[key] = d wormhole = await d return wormhole -def _verify(): +def _verify() -> None: """ Roughly confirm that the in-memory wormhole creation function matches the interface of the real implementation. @@ -145,7 +147,13 @@ def _verify(): b = getfullargspec(MemoryWormholeServer.create) # I know it has a `self` argument at the beginning. That's okay. b = b._replace(args=b.args[1:]) - assert a == b, "{} != {}".format(a, b) + + # Just compare the same information to check function signature + assert a.varkw == b.varkw + assert a.args == b.args + assert a.varargs == b.varargs + assert a.kwonlydefaults == b.kwonlydefaults + assert a.defaults == b.defaults _verify() @@ -158,7 +166,7 @@ class _WormholeApp(object): appid/relay_url scope. """ wormholes: dict[WormholeCode, IWormhole] = field(default=Factory(dict)) - _waiting: dict[WormholeCode, List[Deferred]] = field(default=Factory(dict)) + _waiting: dict[WormholeCode, List[Deferred[_MemoryWormhole]]] = field(default=Factory(dict)) _counter: Iterator[int] = field(default=Factory(count)) def allocate_code(self, wormhole: IWormhole, code: Optional[WormholeCode]) -> WormholeCode: @@ -184,13 +192,13 @@ class _WormholeApp(object): return code - def wait_for_wormhole(self, code: WormholeCode) -> Awaitable[_MemoryWormhole]: + def wait_for_wormhole(self, code: WormholeCode) -> Deferred[_MemoryWormhole]: """ Return a ``Deferred`` which fires with the next wormhole to be associated with the given code. This is used to let the first end of a wormhole rendezvous with the second end. """ - d = Deferred() + d : Deferred[_MemoryWormhole] = Deferred() self._waiting.setdefault(code, []).append(d) return d @@ -234,8 +242,8 @@ class _MemoryWormhole(object): _view: _WormholeServerView _code: Optional[WormholeCode] = None - _payload: DeferredQueue = field(default=Factory(DeferredQueue)) - _waiting_for_code: list[Deferred] = field(default=Factory(list)) + _payload: DeferredQueue[WormholeMessage] = field(default=Factory(DeferredQueue)) + _waiting_for_code: list[Deferred[WormholeCode]] = field(default=Factory(list)) def allocate_code(self) -> None: if self._code is not None: @@ -257,12 +265,12 @@ class _MemoryWormhole(object): def when_code(self) -> Deferred[WormholeCode]: if self._code is None: - d = Deferred() + d : Deferred[WormholeCode] = Deferred() self._waiting_for_code.append(d) return d return succeed(self._code) - def get_welcome(self): + def get_welcome(self) -> Deferred[str]: return succeed("welcome") def send_message(self, payload: WormholeMessage) -> None: @@ -276,8 +284,8 @@ class _MemoryWormhole(object): ) d = self._view.wormhole_by_code(self._code, exclude=self) - def got_wormhole(wormhole): - msg = wormhole._payload.get() + def got_wormhole(wormhole: _MemoryWormhole) -> Deferred[WormholeMessage]: + msg: Deferred[WormholeMessage] = wormhole._payload.get() return msg d.addCallback(got_wormhole) diff --git a/src/allmydata/test/common_system.py b/src/allmydata/test/common_system.py index 3491d413d..cfb6c9f04 100644 --- a/src/allmydata/test/common_system.py +++ b/src/allmydata/test/common_system.py @@ -686,8 +686,8 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): def setUp(self): self._http_client_pools = [] - http_client.StorageClient.start_test_mode(self._got_new_http_connection_pool) - self.addCleanup(http_client.StorageClient.stop_test_mode) + http_client.StorageClientFactory.start_test_mode(self._got_new_http_connection_pool) + self.addCleanup(http_client.StorageClientFactory.stop_test_mode) self.port_assigner = SameProcessStreamEndpointAssigner() self.port_assigner.setUp() self.addCleanup(self.port_assigner.tearDown) @@ -819,8 +819,8 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): helper_furl = f.read() self.helper_furl = helper_furl - if self.numclients >= 4: - with open(os.path.join(basedirs[3], 'tahoe.cfg'), 'a+') as f: + if self.numclients >= 2: + with open(os.path.join(basedirs[1], 'tahoe.cfg'), 'a+') as f: f.write( "[client]\n" "helper.furl = {}\n".format(helper_furl) @@ -836,9 +836,9 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): log.msg("CONNECTED") # now find out where the web port was self.webish_url = self.clients[0].getServiceNamed("webish").getURL() - if self.numclients >=4: + if self.numclients >=2: # and the helper-using webport - self.helper_webish_url = self.clients[3].getServiceNamed("webish").getURL() + self.helper_webish_url = self.clients[1].getServiceNamed("webish").getURL() def _generate_config(self, which, basedir, force_foolscap=False): config = {} @@ -854,10 +854,10 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin): ("node", "tub.location"): allclients, # client 0 runs a webserver and a helper - # client 3 runs a webserver but no helper - ("node", "web.port"): {0, 3}, + # client 1 runs a webserver but no helper + ("node", "web.port"): {0, 1}, ("node", "timeout.keepalive"): {0}, - ("node", "timeout.disconnect"): {3}, + ("node", "timeout.disconnect"): {1}, ("helper", "enabled"): {0}, } diff --git a/src/allmydata/test/eliotutil.py b/src/allmydata/test/eliotutil.py index dd21f1e9d..bdc779f1d 100644 --- a/src/allmydata/test/eliotutil.py +++ b/src/allmydata/test/eliotutil.py @@ -3,18 +3,6 @@ Tools aimed at the interaction between tests and Eliot. Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -# Python 2 compatibility -# Can't use `builtins.str` because it's not JSON encodable: -# `exceptions.TypeError: is not JSON-encodeable` -from past.builtins import unicode as str -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 from six import ensure_text @@ -23,11 +11,7 @@ __all__ = [ "EliotLoggedRunTest", ] -try: - from typing import Callable -except ImportError: - pass - +from typing import Callable from functools import ( partial, wraps, @@ -147,8 +131,8 @@ class EliotLoggedRunTest(object): def with_logging( - test_id, # type: str - test_method, # type: Callable + test_id: str, + test_method: Callable, ): """ Decorate a test method with additional log-related behaviors. diff --git a/src/allmydata/test/mutable/test_version.py b/src/allmydata/test/mutable/test_version.py index 87050424b..c91c1d4f1 100644 --- a/src/allmydata/test/mutable/test_version.py +++ b/src/allmydata/test/mutable/test_version.py @@ -78,18 +78,21 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ fso.nodedirs = [os.path.dirname(abspath_expanduser_unicode(str(storedir))) for (i,ss,storedir) in self.iterate_servers()] - fso.stdout = StringIO() - fso.stderr = StringIO() + # This attribute isn't defined on FindSharesOptions but `find_shares()` + # definitely expects it... + fso.stdout = StringIO() # type: ignore[attr-defined] debug.find_shares(fso) - sharefiles = fso.stdout.getvalue().splitlines() + sharefiles = fso.stdout.getvalue().splitlines() # type: ignore[attr-defined] expected = self.nm.default_encoding_parameters["n"] self.assertThat(sharefiles, HasLength(expected)) + # This attribute isn't defined on DebugOptions but `dump_share()` + # definitely expects it... do = debug.DumpOptions() do["filename"] = sharefiles[0] - do.stdout = StringIO() + do.stdout = StringIO() # type: ignore[attr-defined] debug.dump_share(do) - output = do.stdout.getvalue() + output = do.stdout.getvalue() # type: ignore[attr-defined] lines = set(output.splitlines()) self.assertTrue("Mutable slot found:" in lines, output) self.assertTrue(" share_type: MDMF" in lines, output) @@ -104,10 +107,12 @@ class Version(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin, \ self.assertTrue(" verify-cap: %s" % vcap in lines, output) cso = debug.CatalogSharesOptions() cso.nodedirs = fso.nodedirs - cso.stdout = StringIO() - cso.stderr = StringIO() + # Definitely not options on CatalogSharesOptions, but the code does use + # stdout and stderr... + cso.stdout = StringIO() # type: ignore[attr-defined] + cso.stderr = StringIO() # type: ignore[attr-defined] debug.catalog_shares(cso) - shares = cso.stdout.getvalue().splitlines() + shares = cso.stdout.getvalue().splitlines() # type: ignore[attr-defined] oneshare = shares[0] # all shares should be MDMF self.failIf(oneshare.startswith("UNKNOWN"), oneshare) self.assertTrue(oneshare.startswith("MDMF"), oneshare) diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py index 66748e4b1..e3b57fb95 100644 --- a/src/allmydata/test/no_network.py +++ b/src/allmydata/test/no_network.py @@ -1,34 +1,23 @@ """ -Ported to Python 3. +This contains a test harness that creates a full Tahoe grid in a single +process (actually in a single MultiService) which does not use the network. +It does not use an Introducer, and there are no foolscap Tubs. Each storage +server puts real shares on disk, but is accessed through loopback +RemoteReferences instead of over serialized SSL. It is not as complete as +the common.SystemTestMixin framework (which does use the network), but +should be considerably faster: on my laptop, it takes 50-80ms to start up, +whereas SystemTestMixin takes close to 2s. + +This should be useful for tests which want to examine and/or manipulate the +uploaded shares, checker/verifier/repairer tests, etc. The clients have no +Tubs, so it is not useful for tests that involve a Helper. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -# This contains a test harness that creates a full Tahoe grid in a single -# process (actually in a single MultiService) which does not use the network. -# It does not use an Introducer, and there are no foolscap Tubs. Each storage -# server puts real shares on disk, but is accessed through loopback -# RemoteReferences instead of over serialized SSL. It is not as complete as -# the common.SystemTestMixin framework (which does use the network), but -# should be considerably faster: on my laptop, it takes 50-80ms to start up, -# whereas SystemTestMixin takes close to 2s. +from __future__ import annotations -# This should be useful for tests which want to examine and/or manipulate the -# uploaded shares, checker/verifier/repairer tests, etc. The clients have no -# Tubs, so it is not useful for tests that involve a Helper. - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 -from past.builtins import unicode from six import ensure_text -try: - from typing import Dict, Callable -except ImportError: - pass +from typing import Callable import os from base64 import b32encode @@ -251,7 +240,7 @@ def create_no_network_client(basedir): :return: a Deferred yielding an instance of _Client subclass which does no actual networking but has the same API. """ - basedir = abspath_expanduser_unicode(unicode(basedir)) + basedir = abspath_expanduser_unicode(str(basedir)) fileutil.make_dirs(os.path.join(basedir, "private"), 0o700) from allmydata.client import read_config @@ -487,7 +476,7 @@ class GridTestMixin(object): ]) def set_up_grid(self, num_clients=1, num_servers=10, - client_config_hooks={}, oneshare=False): + client_config_hooks=None, oneshare=False): """ Create a Tahoe-LAFS storage grid. @@ -500,6 +489,8 @@ class GridTestMixin(object): :return: ``None`` """ + if client_config_hooks is None: + client_config_hooks = {} # self.basedir must be set port_assigner = SameProcessStreamEndpointAssigner() port_assigner.setUp() @@ -577,8 +568,7 @@ class GridTestMixin(object): pass return sorted(shares) - def copy_shares(self, uri): - # type: (bytes) -> Dict[bytes, bytes] + def copy_shares(self, uri: bytes) -> dict[bytes, bytes]: """ Read all of the share files for the given capability from the storage area of the storage servers created by ``set_up_grid``. @@ -630,8 +620,7 @@ class GridTestMixin(object): with open(i_sharefile, "wb") as f: f.write(corruptdata) - def corrupt_all_shares(self, uri, corruptor, debug=False): - # type: (bytes, Callable[[bytes, bool], bytes], bool) -> None + def corrupt_all_shares(self, uri: bytes, corruptor: Callable[[bytes, bool], bytes], debug: bool=False): """ Apply ``corruptor`` to the contents of all share files associated with a given capability and replace the share file contents with its result. diff --git a/src/allmydata/test/test_connection_status.py b/src/allmydata/test/test_connection_status.py index 2bd8bf6ab..da41f5a47 100644 --- a/src/allmydata/test/test_connection_status.py +++ b/src/allmydata/test/test_connection_status.py @@ -1,25 +1,46 @@ """ Tests for allmydata.util.connection_status. - -Port to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations -import mock +from typing import Optional -from twisted.trial import unittest +from foolscap.reconnector import ReconnectionInfo, Reconnector +from foolscap.info import ConnectionInfo from ..util import connection_status +from .common import SyncTestCase -class Status(unittest.TestCase): - def test_hint_statuses(self): +def reconnector(info: ReconnectionInfo) -> Reconnector: + rc = Reconnector(None, None, (), {}) # type: ignore[no-untyped-call] + rc._reconnectionInfo = info + return rc + +def connection_info( + statuses: dict[str, str], + handlers: dict[str, str], + winningHint: Optional[str], + establishedAt: Optional[int], +) -> ConnectionInfo: + ci = ConnectionInfo() # type: ignore[no-untyped-call] + ci.connectorStatuses = statuses + ci.connectionHandlers = handlers + ci.winningHint = winningHint + ci.establishedAt = establishedAt + return ci + +def reconnection_info( + state: str, + connection_info: ConnectionInfo, +) -> ReconnectionInfo: + ri = ReconnectionInfo() # type: ignore[no-untyped-call] + ri.state = state + ri.connectionInfo = connection_info + return ri + +class Status(SyncTestCase): + def test_hint_statuses(self) -> None: ncs = connection_status._hint_statuses(["h2","h1"], {"h1": "hand1", "h4": "hand4"}, {"h1": "st1", "h2": "st2", @@ -27,17 +48,10 @@ class Status(unittest.TestCase): self.assertEqual(ncs, {"h1 via hand1": "st1", "h2": "st2"}) - def test_reconnector_connected(self): - ci = mock.Mock() - ci.connectorStatuses = {"h1": "st1"} - ci.connectionHandlers = {"h1": "hand1"} - ci.winningHint = "h1" - ci.establishedAt = 120 - ri = mock.Mock() - ri.state = "connected" - ri.connectionInfo = ci - rc = mock.Mock - rc.getReconnectionInfo = mock.Mock(return_value=ri) + def test_reconnector_connected(self) -> None: + ci = connection_info({"h1": "st1"}, {"h1": "hand1"}, "h1", 120) + ri = reconnection_info("connected", ci) + rc = reconnector(ri) cs = connection_status.from_foolscap_reconnector(rc, 123) self.assertEqual(cs.connected, True) self.assertEqual(cs.summary, "Connected to h1 via hand1") @@ -45,17 +59,10 @@ class Status(unittest.TestCase): self.assertEqual(cs.last_connection_time, 120) self.assertEqual(cs.last_received_time, 123) - def test_reconnector_connected_others(self): - ci = mock.Mock() - ci.connectorStatuses = {"h1": "st1", "h2": "st2"} - ci.connectionHandlers = {"h1": "hand1"} - ci.winningHint = "h1" - ci.establishedAt = 120 - ri = mock.Mock() - ri.state = "connected" - ri.connectionInfo = ci - rc = mock.Mock - rc.getReconnectionInfo = mock.Mock(return_value=ri) + def test_reconnector_connected_others(self) -> None: + ci = connection_info({"h1": "st1", "h2": "st2"}, {"h1": "hand1"}, "h1", 120) + ri = reconnection_info("connected", ci) + rc = reconnector(ri) cs = connection_status.from_foolscap_reconnector(rc, 123) self.assertEqual(cs.connected, True) self.assertEqual(cs.summary, "Connected to h1 via hand1") @@ -63,18 +70,11 @@ class Status(unittest.TestCase): self.assertEqual(cs.last_connection_time, 120) self.assertEqual(cs.last_received_time, 123) - def test_reconnector_connected_listener(self): - ci = mock.Mock() - ci.connectorStatuses = {"h1": "st1", "h2": "st2"} - ci.connectionHandlers = {"h1": "hand1"} + def test_reconnector_connected_listener(self) -> None: + ci = connection_info({"h1": "st1", "h2": "st2"}, {"h1": "hand1"}, None, 120) ci.listenerStatus = ("listener1", "successful") - ci.winningHint = None - ci.establishedAt = 120 - ri = mock.Mock() - ri.state = "connected" - ri.connectionInfo = ci - rc = mock.Mock - rc.getReconnectionInfo = mock.Mock(return_value=ri) + ri = reconnection_info("connected", ci) + rc = reconnector(ri) cs = connection_status.from_foolscap_reconnector(rc, 123) self.assertEqual(cs.connected, True) self.assertEqual(cs.summary, "Connected via listener (listener1)") @@ -83,15 +83,10 @@ class Status(unittest.TestCase): self.assertEqual(cs.last_connection_time, 120) self.assertEqual(cs.last_received_time, 123) - def test_reconnector_connecting(self): - ci = mock.Mock() - ci.connectorStatuses = {"h1": "st1", "h2": "st2"} - ci.connectionHandlers = {"h1": "hand1"} - ri = mock.Mock() - ri.state = "connecting" - ri.connectionInfo = ci - rc = mock.Mock - rc.getReconnectionInfo = mock.Mock(return_value=ri) + def test_reconnector_connecting(self) -> None: + ci = connection_info({"h1": "st1", "h2": "st2"}, {"h1": "hand1"}, None, None) + ri = reconnection_info("connecting", ci) + rc = reconnector(ri) cs = connection_status.from_foolscap_reconnector(rc, 123) self.assertEqual(cs.connected, False) self.assertEqual(cs.summary, "Trying to connect") @@ -100,19 +95,13 @@ class Status(unittest.TestCase): self.assertEqual(cs.last_connection_time, None) self.assertEqual(cs.last_received_time, 123) - def test_reconnector_waiting(self): - ci = mock.Mock() - ci.connectorStatuses = {"h1": "st1", "h2": "st2"} - ci.connectionHandlers = {"h1": "hand1"} - ri = mock.Mock() - ri.state = "waiting" + def test_reconnector_waiting(self) -> None: + ci = connection_info({"h1": "st1", "h2": "st2"}, {"h1": "hand1"}, None, None) + ri = reconnection_info("waiting", ci) ri.lastAttempt = 10 ri.nextAttempt = 20 - ri.connectionInfo = ci - rc = mock.Mock - rc.getReconnectionInfo = mock.Mock(return_value=ri) - with mock.patch("time.time", return_value=12): - cs = connection_status.from_foolscap_reconnector(rc, 5) + rc = reconnector(ri) + cs = connection_status.from_foolscap_reconnector(rc, 5, time=lambda: 12) self.assertEqual(cs.connected, False) self.assertEqual(cs.summary, "Reconnecting in 8 seconds (last attempt 2s ago)") diff --git a/src/allmydata/test/test_consumer.py b/src/allmydata/test/test_consumer.py index 234fc2594..ee1908ba7 100644 --- a/src/allmydata/test/test_consumer.py +++ b/src/allmydata/test/test_consumer.py @@ -39,6 +39,12 @@ class Producer(object): self.consumer = consumer self.done = False + def stopProducing(self): + pass + + def pauseProducing(self): + pass + def resumeProducing(self): """Kick off streaming.""" self.iterate() diff --git a/src/allmydata/test/test_deferredutil.py b/src/allmydata/test/test_deferredutil.py index a37dfdd6f..34358d0c8 100644 --- a/src/allmydata/test/test_deferredutil.py +++ b/src/allmydata/test/test_deferredutil.py @@ -1,23 +1,18 @@ """ Tests for allmydata.util.deferredutil. - -Ported to Python 3. """ -from __future__ import unicode_literals -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations from twisted.trial import unittest from twisted.internet import defer, reactor +from twisted.internet.defer import Deferred from twisted.python.failure import Failure +from hypothesis.strategies import integers +from hypothesis import given from allmydata.util import deferredutil +from allmydata.util.deferredutil import race, MultiFailure class DeferredUtilTests(unittest.TestCase, deferredutil.WaitForDelayedCallsMixin): @@ -157,3 +152,148 @@ class AsyncToDeferred(unittest.TestCase): result = f(1, 0) self.assertIsInstance(self.failureResultOf(result).value, ZeroDivisionError) + + + +def _setupRaceState(numDeferreds: int) -> tuple[list[int], list[Deferred[object]]]: + """ + Create a list of Deferreds and a corresponding list of integers + tracking how many times each Deferred has been cancelled. Without + additional steps the Deferreds will never fire. + """ + cancelledState = [0] * numDeferreds + + ds: list[Deferred[object]] = [] + for n in range(numDeferreds): + + def cancel(d: Deferred, n: int = n) -> None: + cancelledState[n] += 1 + + ds.append(Deferred(canceller=cancel)) + + return cancelledState, ds + + +class RaceTests(unittest.SynchronousTestCase): + """ + Tests for L{race}. + """ + + @given( + beforeWinner=integers(min_value=0, max_value=3), + afterWinner=integers(min_value=0, max_value=3), + ) + def test_success(self, beforeWinner: int, afterWinner: int) -> None: + """ + When one of the L{Deferred}s passed to L{race} fires successfully, + the L{Deferred} return by L{race} fires with the index of that + L{Deferred} and its result and cancels the rest of the L{Deferred}s. + @param beforeWinner: A randomly selected number of Deferreds to + appear before the "winning" Deferred in the list passed in. + @param beforeWinner: A randomly selected number of Deferreds to + appear after the "winning" Deferred in the list passed in. + """ + cancelledState, ds = _setupRaceState(beforeWinner + 1 + afterWinner) + + raceResult = race(ds) + expected = object() + ds[beforeWinner].callback(expected) + + # The result should be the index and result of the only Deferred that + # fired. + self.assertEqual( + self.successResultOf(raceResult), + (beforeWinner, expected), + ) + # All Deferreds except the winner should have been cancelled once. + expectedCancelledState = [1] * beforeWinner + [0] + [1] * afterWinner + self.assertEqual( + cancelledState, + expectedCancelledState, + ) + + @given( + beforeWinner=integers(min_value=0, max_value=3), + afterWinner=integers(min_value=0, max_value=3), + ) + def test_failure(self, beforeWinner: int, afterWinner: int) -> None: + """ + When all of the L{Deferred}s passed to L{race} fire with failures, + the L{Deferred} return by L{race} fires with L{MultiFailure} wrapping + all of their failures. + @param beforeWinner: A randomly selected number of Deferreds to + appear before the "winning" Deferred in the list passed in. + @param beforeWinner: A randomly selected number of Deferreds to + appear after the "winning" Deferred in the list passed in. + """ + cancelledState, ds = _setupRaceState(beforeWinner + 1 + afterWinner) + + failure = Failure(Exception("The test demands failures.")) + raceResult = race(ds) + for d in ds: + d.errback(failure) + + actualFailure = self.failureResultOf(raceResult, MultiFailure) + self.assertEqual( + actualFailure.value.failures, + [failure] * len(ds), + ) + self.assertEqual( + cancelledState, + [0] * len(ds), + ) + + @given( + beforeWinner=integers(min_value=0, max_value=3), + afterWinner=integers(min_value=0, max_value=3), + ) + def test_resultAfterCancel(self, beforeWinner: int, afterWinner: int) -> None: + """ + If one of the Deferreds fires after it was cancelled its result + goes nowhere. In particular, it does not cause any errors to be + logged. + """ + # Ensure we have a Deferred to win and at least one other Deferred + # that can ignore cancellation. + ds: list[Deferred[None]] = [ + Deferred() for n in range(beforeWinner + 2 + afterWinner) + ] + + raceResult = race(ds) + ds[beforeWinner].callback(None) + ds[beforeWinner + 1].callback(None) + + self.successResultOf(raceResult) + self.assertEqual(len(self.flushLoggedErrors()), 0) + + def test_resultFromCancel(self) -> None: + """ + If one of the input Deferreds has a cancel function that fires it + with success, nothing bad happens. + """ + winner: Deferred[object] = Deferred() + ds: list[Deferred[object]] = [ + winner, + Deferred(canceller=lambda d: d.callback(object())), + ] + expected = object() + raceResult = race(ds) + winner.callback(expected) + + self.assertEqual(self.successResultOf(raceResult), (0, expected)) + + @given( + numDeferreds=integers(min_value=1, max_value=3), + ) + def test_cancel(self, numDeferreds: int) -> None: + """ + If the result of L{race} is cancelled then all of the L{Deferred}s + passed in are cancelled. + """ + cancelledState, ds = _setupRaceState(numDeferreds) + + raceResult = race(ds) + raceResult.cancel() + + self.assertEqual(cancelledState, [1] * numDeferreds) + self.failureResultOf(raceResult, MultiFailure) diff --git a/src/allmydata/test/test_download.py b/src/allmydata/test/test_download.py index 85d89cde6..4d57fa828 100644 --- a/src/allmydata/test/test_download.py +++ b/src/allmydata/test/test_download.py @@ -1,23 +1,14 @@ """ Ported to Python 3. """ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals -from future.utils import PY2, bchr -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from future.utils import bchr # system-level upload+download roundtrip test, but using shares created from # a previous run. This asserts that the current code is capable of decoding # shares from a previous version. -try: - from typing import Any -except ImportError: - pass +from typing import Any import six import os @@ -1197,8 +1188,7 @@ class Corruption(_Base, unittest.TestCase): return d - def _corrupt_flip_all(self, ign, imm_uri, which): - # type: (Any, bytes, int) -> None + def _corrupt_flip_all(self, ign: Any, imm_uri: bytes, which: int) -> None: """ Flip the least significant bit at a given byte position in all share files for the given capability. diff --git a/src/allmydata/test/test_helper.py b/src/allmydata/test/test_helper.py index 933a2b591..b280f95df 100644 --- a/src/allmydata/test/test_helper.py +++ b/src/allmydata/test/test_helper.py @@ -1,14 +1,7 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import os from struct import ( @@ -17,13 +10,8 @@ from struct import ( from functools import ( partial, ) -import attr -try: - from typing import List - from allmydata.introducer.client import IntroducerClient -except ImportError: - pass +import attr from twisted.internet import defer from twisted.trial import unittest @@ -35,6 +23,7 @@ from eliot.twisted import ( inline_callbacks, ) +from allmydata.introducer.client import IntroducerClient from allmydata.crypto import aes from allmydata.storage.server import ( si_b2a, @@ -132,7 +121,7 @@ class FakeCHKCheckerAndUEBFetcher(object): )) class FakeClient(service.MultiService): - introducer_clients = [] # type: List[IntroducerClient] + introducer_clients : list[IntroducerClient] = [] DEFAULT_ENCODING_PARAMETERS = {"k":25, "happy": 75, "n": 100, diff --git a/src/allmydata/test/test_iputil.py b/src/allmydata/test/test_iputil.py index 081c80ee3..26274830f 100644 --- a/src/allmydata/test/test_iputil.py +++ b/src/allmydata/test/test_iputil.py @@ -4,18 +4,13 @@ Tests for allmydata.util.iputil. Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2, native_str -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import os, socket import gc +from functools import wraps +from typing import TypeVar, Callable from testtools.matchers import ( MatchesAll, IsInstance, @@ -25,8 +20,6 @@ from testtools.matchers import ( from twisted.trial import unittest -from tenacity import retry, stop_after_attempt - from foolscap.api import Tub from allmydata.util import iputil, gcutil @@ -39,6 +32,43 @@ from .common import ( SyncTestCase, ) +T = TypeVar("T", contravariant=True) +U = TypeVar("U", covariant=True) + +def retry(stop: Callable[[], bool]) -> Callable[[Callable[[T], U]], Callable[[T], U]]: + """ + Call a function until the predicate says to stop or the function stops + raising an exception. + + :param stop: A callable to call after the decorated function raises an + exception. The decorated function will be called again if ``stop`` + returns ``False``. + + :return: A decorator function. + """ + def decorate(f: Callable[[T], U]) -> Callable[[T], U]: + @wraps(f) + def decorator(self: T) -> U: + while True: + try: + return f(self) + except Exception: + if stop(): + raise + return decorator + return decorate + +def stop_after_attempt(limit: int) -> Callable[[], bool]: + """ + Stop after ``limit`` calls. + """ + counter = 0 + def check(): + nonlocal counter + counter += 1 + return counter < limit + return check + class ListenOnUsed(unittest.TestCase): """Tests for listenOnUnused.""" @@ -127,7 +157,7 @@ class GetLocalAddressesSyncTests(SyncTestCase): IsInstance(list), AllMatch( MatchesAll( - IsInstance(native_str), + IsInstance(str), MatchesPredicate( lambda addr: socket.inet_pton(socket.AF_INET, addr), "%r is not an IPv4 address.", diff --git a/src/allmydata/test/test_istorageserver.py b/src/allmydata/test/test_istorageserver.py index 9e7e7b6e1..ded9ac1ac 100644 --- a/src/allmydata/test/test_istorageserver.py +++ b/src/allmydata/test/test_istorageserver.py @@ -8,9 +8,9 @@ reused across tests, so each test should be careful to generate unique storage indexes. """ -from future.utils import bchr +from __future__ import annotations -from typing import Set +from future.utils import bchr from random import Random from unittest import SkipTest @@ -1041,7 +1041,7 @@ class IStorageServerMutableAPIsTestsMixin(object): class _SharedMixin(SystemTestMixin): """Base class for Foolscap and HTTP mixins.""" - SKIP_TESTS = set() # type: Set[str] + SKIP_TESTS : set[str] = set() def _get_istorage_server(self): native_server = next(iter(self.clients[0].storage_broker.get_known_servers())) diff --git a/src/allmydata/test/test_storage_client.py b/src/allmydata/test/test_storage_client.py index 109122da6..0671526ae 100644 --- a/src/allmydata/test/test_storage_client.py +++ b/src/allmydata/test/test_storage_client.py @@ -1,22 +1,16 @@ """ -Ported from Python 3. +Tests for allmydata.storage_client. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -from six import ensure_text +from __future__ import annotations from json import ( loads, ) - import hashlib +from typing import Union, Any + +from hyperlink import DecodedURL from fixtures import ( TempDir, ) @@ -60,6 +54,7 @@ from twisted.internet.defer import ( from twisted.python.filepath import ( FilePath, ) +from twisted.internet.task import Clock from foolscap.api import ( Tub, @@ -68,6 +63,8 @@ from foolscap.ipb import ( IConnectionHintHandler, ) +from allmydata.util.deferredutil import MultiFailure + from .no_network import LocalWrapper from .common import ( EMPTY_CLIENT_CONFIG, @@ -94,7 +91,8 @@ from allmydata.storage_client import ( StorageFarmBroker, _FoolscapStorage, _NullStorage, - ANONYMOUS_STORAGE_NURLS + _pick_a_http_server, + ANONYMOUS_STORAGE_NURLS, ) from ..storage.server import ( StorageServer, @@ -478,7 +476,7 @@ class StoragePluginWebPresence(AsyncTestCase): # config validation policy). "tub.port": tubport_endpoint, "tub.location": tubport_location, - "web.port": ensure_text(webport_endpoint), + "web.port": str(webport_endpoint), }, storage_plugin=self.storage_plugin, basedir=self.basedir, @@ -781,3 +779,61 @@ storage: StorageFarmBroker._should_we_use_http(node_config, announcement), expected_http_usage ) + + +class PickHTTPServerTests(unittest.SynchronousTestCase): + """Tests for ``_pick_a_http_server``.""" + + def pick_result(self, url_to_results: dict[DecodedURL, tuple[float, Union[Exception, Any]]]) -> Deferred[DecodedURL]: + """ + Given mapping of URLs to (delay, result), return the URL of the + first selected server, or None. + """ + clock = Clock() + + def request(reactor, url): + delay, value = url_to_results[url] + result = Deferred() + def add_result_value(): + if isinstance(value, Exception): + result.errback(value) + else: + result.callback(value) + reactor.callLater(delay, add_result_value) + return result + + d = _pick_a_http_server(clock, list(url_to_results.keys()), request) + for i in range(100): + clock.advance(0.1) + return d + + def test_first_successful_connect_is_picked(self): + """ + Given multiple good URLs, the first one that connects is chosen. + """ + earliest_url = DecodedURL.from_text("http://a") + latest_url = DecodedURL.from_text("http://b") + bad_url = DecodedURL.from_text("http://bad") + result = self.pick_result({ + latest_url: (2, None), + earliest_url: (1, None), + bad_url: (0.5, RuntimeError()), + }) + self.assertEqual(self.successResultOf(result), earliest_url) + + def test_failures_include_all_reasons(self): + """ + If all the requests fail, ``_pick_a_http_server`` raises a + ``allmydata.util.deferredutil.MultiFailure``. + """ + eventually_good_url = DecodedURL.from_text("http://good") + bad_url = DecodedURL.from_text("http://bad") + exception1 = RuntimeError() + exception2 = ZeroDivisionError() + result = self.pick_result({ + eventually_good_url: (1, exception1), + bad_url: (0.1, exception2), + }) + exc = self.failureResultOf(result).value + self.assertIsInstance(exc, MultiFailure) + self.assertEqual({f.value for f in exc.failures}, {exception2, exception1}) diff --git a/src/allmydata/test/test_storage_http.py b/src/allmydata/test/test_storage_http.py index eb5bcd4db..30f6a527d 100644 --- a/src/allmydata/test/test_storage_http.py +++ b/src/allmydata/test/test_storage_http.py @@ -34,7 +34,7 @@ from hyperlink import DecodedURL from collections_extended import RangeMap from twisted.internet.task import Clock, Cooperator from twisted.internet.interfaces import IReactorTime, IReactorFromThreads -from twisted.internet.defer import CancelledError, Deferred +from twisted.internet.defer import CancelledError, Deferred, ensureDeferred from twisted.web import http from twisted.web.http_headers import Headers from werkzeug import routing @@ -43,7 +43,11 @@ from testtools.matchers import Equals from zope.interface import implementer from .common import SyncTestCase -from ..storage.http_common import get_content_type, CBOR_MIME_TYPE +from ..storage.http_common import ( + get_content_type, + CBOR_MIME_TYPE, + response_is_not_html, +) from ..storage.common import si_b2a from ..storage.lease import LeaseInfo from ..storage.server import StorageServer @@ -54,9 +58,11 @@ from ..storage.http_server import ( ClientSecretsException, _authorized_route, StorageIndexConverter, + _add_error_handling, ) from ..storage.http_client import ( StorageClient, + StorageClientFactory, ClientException, StorageClientImmutables, ImmutableCreateResult, @@ -253,8 +259,13 @@ class TestApp(object): clock: IReactorTime _app = Klein() + _add_error_handling(_app) _swissnum = SWISSNUM_FOR_TEST # Match what the test client is using + @_authorized_route(_app, {}, "/noop", methods=["GET"]) + def noop(self, request, authorization): + return "noop" + @_authorized_route(_app, {Secrets.UPLOAD}, "/upload_secret", methods=["GET"]) def validate_upload_secret(self, request, authorization): if authorization == {Secrets.UPLOAD: b"MAGIC"}: @@ -309,7 +320,6 @@ def result_of(d): + "This is probably a test design issue." ) - class CustomHTTPServerTests(SyncTestCase): """ Tests that use a custom HTTP server. @@ -317,10 +327,10 @@ class CustomHTTPServerTests(SyncTestCase): def setUp(self): super(CustomHTTPServerTests, self).setUp() - StorageClient.start_test_mode( + StorageClientFactory.start_test_mode( lambda pool: self.addCleanup(pool.closeCachedConnections) ) - self.addCleanup(StorageClient.stop_test_mode) + self.addCleanup(StorageClientFactory.stop_test_mode) # Could be a fixture, but will only be used in this test class so not # going to bother: self._http_server = TestApp() @@ -329,24 +339,65 @@ class CustomHTTPServerTests(SyncTestCase): DecodedURL.from_text("http://127.0.0.1"), SWISSNUM_FOR_TEST, treq=treq, + pool=None, # We're using a Treq private API to get the reactor, alas, but only # in a test, so not going to worry about it too much. This would be # fixed if https://github.com/twisted/treq/issues/226 were ever # fixed. clock=treq._agent._memoryReactor, + analyze_response=response_is_not_html, ) self._http_server.clock = self.client._clock + def test_bad_swissnum_from_client(self) -> None: + """ + If the swissnum is invalid, a BAD REQUEST response code is returned. + """ + headers = Headers() + # The value is not UTF-8. + headers.addRawHeader("Authorization", b"\x00\xFF\x00\xFF") + response = result_of( + self.client._treq.request( + "GET", + DecodedURL.from_text("http://127.0.0.1/noop"), + headers=headers, + ) + ) + self.assertEqual(response.code, 400) + + def test_bad_secret(self) -> None: + """ + If the secret is invalid (not base64), a BAD REQUEST + response code is returned. + """ + bad_secret = b"upload-secret []<>" + headers = Headers() + headers.addRawHeader( + "X-Tahoe-Authorization", + bad_secret, + ) + response = result_of( + self.client.request( + "GET", + DecodedURL.from_text("http://127.0.0.1/upload_secret"), + headers=headers, + ) + ) + self.assertEqual(response.code, 400) + def test_authorization_enforcement(self): """ The requirement for secrets is enforced by the ``_authorized_route`` decorator; if they are not given, a 400 response code is returned. + + Note that this refers to ``X-Tahoe-Authorization``, not the + ``Authorization`` header used for the swissnum. """ # Without secret, get a 400 error. response = result_of( self.client.request( "GET", - "http://127.0.0.1/upload_secret", + DecodedURL.from_text("http://127.0.0.1/upload_secret"), ) ) self.assertEqual(response.code, 400) @@ -354,7 +405,9 @@ class CustomHTTPServerTests(SyncTestCase): # With secret, we're good. response = result_of( self.client.request( - "GET", "http://127.0.0.1/upload_secret", upload_secret=b"MAGIC" + "GET", + DecodedURL.from_text("http://127.0.0.1/upload_secret"), + upload_secret=b"MAGIC", ) ) self.assertEqual(response.code, 200) @@ -378,7 +431,7 @@ class CustomHTTPServerTests(SyncTestCase): response = result_of( self.client.request( "GET", - f"http://127.0.0.1/bytes/{length}", + DecodedURL.from_text(f"http://127.0.0.1/bytes/{length}"), ) ) @@ -399,7 +452,7 @@ class CustomHTTPServerTests(SyncTestCase): response = result_of( self.client.request( "GET", - f"http://127.0.0.1/bytes/{length}", + DecodedURL.from_text(f"http://127.0.0.1/bytes/{length}"), ) ) @@ -414,7 +467,7 @@ class CustomHTTPServerTests(SyncTestCase): response = result_of( self.client.request( "GET", - "http://127.0.0.1/slowly_never_finish_result", + DecodedURL.from_text("http://127.0.0.1/slowly_never_finish_result"), ) ) @@ -442,7 +495,7 @@ class CustomHTTPServerTests(SyncTestCase): response = result_of( self.client.request( "GET", - "http://127.0.0.1/die", + DecodedURL.from_text("http://127.0.0.1/die"), ) ) @@ -459,12 +512,13 @@ class Reactor(Clock): Advancing the clock also runs any callbacks scheduled via callFromThread. """ + def __init__(self): Clock.__init__(self) self._queue = Queue() - def callFromThread(self, f, *args, **kwargs): - self._queue.put((f, args, kwargs)) + def callFromThread(self, callable, *args, **kwargs): + self._queue.put((callable, args, kwargs)) def advance(self, *args, **kwargs): Clock.advance(self, *args, **kwargs) @@ -480,10 +534,10 @@ class HttpTestFixture(Fixture): """ def _setUp(self): - StorageClient.start_test_mode( + StorageClientFactory.start_test_mode( lambda pool: self.addCleanup(pool.closeCachedConnections) ) - self.addCleanup(StorageClient.stop_test_mode) + self.addCleanup(StorageClientFactory.stop_test_mode) self.clock = Reactor() self.tempdir = self.useFixture(TempDir()) # The global Cooperator used by Twisted (a) used by pull producers in @@ -499,13 +553,17 @@ class HttpTestFixture(Fixture): self.storage_server = StorageServer( self.tempdir.path, b"\x00" * 20, clock=self.clock ) - self.http_server = HTTPServer(self.clock, self.storage_server, SWISSNUM_FOR_TEST) + self.http_server = HTTPServer( + self.clock, self.storage_server, SWISSNUM_FOR_TEST + ) self.treq = StubTreq(self.http_server.get_resource()) self.client = StorageClient( DecodedURL.from_text("http://127.0.0.1"), SWISSNUM_FOR_TEST, treq=self.treq, + pool=None, clock=self.clock, + analyze_response=response_is_not_html, ) def result_of_with_flush(self, d): @@ -513,6 +571,7 @@ class HttpTestFixture(Fixture): Like ``result_of``, but supports fake reactor and ``treq`` testing infrastructure necessary to support asynchronous HTTP server endpoints. """ + d = ensureDeferred(d) result = [] error = [] d.addCallbacks(result.append, error.append) @@ -616,7 +675,9 @@ class GenericHTTPAPITests(SyncTestCase): DecodedURL.from_text("http://127.0.0.1"), b"something wrong", treq=StubTreq(self.http.http_server.get_resource()), + pool=None, clock=self.http.clock, + analyze_response=response_is_not_html, ) ) with assert_fails_with_http_code(self, http.UNAUTHORIZED): @@ -1447,7 +1508,7 @@ class SharedImmutableMutableTestsMixin: self.client.advise_corrupt_share(storage_index, 13, reason) ) - for (si, share_number) in [(storage_index, 11), (urandom(16), 13)]: + for si, share_number in [(storage_index, 11), (urandom(16), 13)]: with assert_fails_with_http_code(self, http.NOT_FOUND): self.http.result_of_with_flush( self.client.advise_corrupt_share(si, share_number, reason) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index d11a6e866..18ac6c6e6 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -1,20 +1,13 @@ """ Ported to Python 3. """ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - # Don't import bytes since it causes issues on (so far unported) modules on Python 2. - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, max, min, str # noqa: F401 +from __future__ import annotations from past.builtins import chr as byteschr, long from six import ensure_text import os, re, sys, time, json +from typing import Optional from bs4 import BeautifulSoup @@ -56,10 +49,12 @@ from .common_util import run_cli_unicode class RunBinTahoeMixin(object): - def run_bintahoe(self, args, stdin=None, python_options=[], env=None): + def run_bintahoe(self, args, stdin=None, python_options:Optional[list[str]]=None, env=None): # test_runner.run_bintahoe has better unicode support but doesn't # support env yet and is also synchronous. If we could get rid of # this in favor of that, though, it would probably be an improvement. + if python_options is None: + python_options = [] command = sys.executable argv = python_options + ["-b", "-m", "allmydata.scripts.runner"] + args @@ -787,7 +782,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def test_filesystem(self): self.data = LARGE_DATA - d = self.set_up_nodes() + d = self.set_up_nodes(2) def _new_happy_semantics(ign): for c in self.clients: c.encoding_params['happy'] = 1 @@ -1088,7 +1083,9 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): headers["content-type"] = "multipart/form-data; boundary=%s" % str(sepbase, "ascii") return self.POST2(urlpath, body, headers, use_helper) - def POST2(self, urlpath, body=b"", headers={}, use_helper=False): + def POST2(self, urlpath, body=b"", headers=None, use_helper=False): + if headers is None: + headers = {} if use_helper: url = self.helper_webish_url + urlpath else: @@ -1409,7 +1406,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): rc,out,err = yield run_cli(verb, *args, nodeargs=nodeargs, **kwargs) defer.returnValue((out,err)) - def _check_ls(out_and_err, expected_children, unexpected_children=[]): + def _check_ls(out_and_err, expected_children, unexpected_children=()): (out, err) = out_and_err self.failUnlessEqual(err, "") for s in expected_children: @@ -1749,6 +1746,10 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): return d + # In CI this test can be very slow, so give it a longer timeout: + test_filesystem.timeout = 360 # type: ignore[attr-defined] + + def test_filesystem_with_cli_in_subprocess(self): # We do this in a separate test so that test_filesystem doesn't skip if we can't run bin/tahoe. diff --git a/src/allmydata/test/test_tor_provider.py b/src/allmydata/test/test_tor_provider.py index 86d54803a..20f947d55 100644 --- a/src/allmydata/test/test_tor_provider.py +++ b/src/allmydata/test/test_tor_provider.py @@ -1,14 +1,6 @@ """ Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 import os from twisted.trial import unittest @@ -94,16 +86,15 @@ class LaunchTor(unittest.TestCase): reactor = object() private_dir = "private" txtorcon = mock.Mock() - tpp = mock.Mock - tpp.tor_protocol = mock.Mock() - txtorcon.launch_tor = mock.Mock(return_value=tpp) + tor = mock.Mock + txtorcon.launch = mock.Mock(return_value=tor) with mock.patch("allmydata.util.tor_provider.allocate_tcp_port", return_value=999999): d = tor_provider._launch_tor(reactor, tor_executable, private_dir, txtorcon) - tor_control_endpoint, tor_control_proto = self.successResultOf(d) - self.assertIs(tor_control_proto, tpp.tor_protocol) + tor_control_endpoint, tor_result = self.successResultOf(d) + self.assertIs(tor_result, tor) def test_launch(self): return self._do_test_launch(None) @@ -161,6 +152,12 @@ class ConnectToTor(unittest.TestCase): return self._do_test_connect(None, False) +class FakeTor: + """Pretends to be a ``txtorcon.Tor`` instance.""" + def __init__(self): + self.protocol = object() + + class CreateOnion(unittest.TestCase): def test_no_txtorcon(self): with mock.patch("allmydata.util.tor_provider._import_txtorcon", @@ -171,6 +168,7 @@ class CreateOnion(unittest.TestCase): self.assertEqual(str(f.value), "Cannot create onion without txtorcon. " "Please 'pip install tahoe-lafs[tor]' to fix this.") + def _do_test_launch(self, executable): basedir = self.mktemp() os.mkdir(basedir) @@ -181,9 +179,9 @@ class CreateOnion(unittest.TestCase): if executable: args.append("--tor-executable=%s" % executable) cli_config = make_cli_config(basedir, *args) - protocol = object() + tor_instance = FakeTor() launch_tor = mock.Mock(return_value=defer.succeed(("control_endpoint", - protocol))) + tor_instance))) txtorcon = mock.Mock() ehs = mock.Mock() # This appears to be a native string in the real txtorcon object... @@ -204,8 +202,8 @@ class CreateOnion(unittest.TestCase): launch_tor.assert_called_with(reactor, executable, os.path.abspath(private_dir), txtorcon) txtorcon.EphemeralHiddenService.assert_called_with("3457 127.0.0.1:999999") - ehs.add_to_tor.assert_called_with(protocol) - ehs.remove_from_tor.assert_called_with(protocol) + ehs.add_to_tor.assert_called_with(tor_instance.protocol) + ehs.remove_from_tor.assert_called_with(tor_instance.protocol) expected = {"launch": "true", "onion": "true", @@ -587,13 +585,14 @@ class Provider_Service(unittest.TestCase): txtorcon = mock.Mock() with mock_txtorcon(txtorcon): p = tor_provider.create(reactor, cfg) + tor_instance = FakeTor() tor_state = mock.Mock() - tor_state.protocol = object() + tor_state.protocol = tor_instance.protocol ehs = mock.Mock() ehs.add_to_tor = mock.Mock(return_value=defer.succeed(None)) ehs.remove_from_tor = mock.Mock(return_value=defer.succeed(None)) txtorcon.EphemeralHiddenService = mock.Mock(return_value=ehs) - launch_tor = mock.Mock(return_value=defer.succeed((None,tor_state.protocol))) + launch_tor = mock.Mock(return_value=defer.succeed((None,tor_instance))) with mock.patch("allmydata.util.tor_provider._launch_tor", launch_tor): d = p.startService() @@ -628,9 +627,8 @@ class Provider_Service(unittest.TestCase): txtorcon = mock.Mock() with mock_txtorcon(txtorcon): p = tor_provider.create(reactor, cfg) - tor_state = mock.Mock() - tor_state.protocol = object() - txtorcon.build_tor_connection = mock.Mock(return_value=tor_state) + tor_instance = FakeTor() + txtorcon.connect = mock.Mock(return_value=tor_instance) ehs = mock.Mock() ehs.add_to_tor = mock.Mock(return_value=defer.succeed(None)) ehs.remove_from_tor = mock.Mock(return_value=defer.succeed(None)) @@ -642,12 +640,12 @@ class Provider_Service(unittest.TestCase): yield flushEventualQueue() self.successResultOf(d) self.assertIs(p._onion_ehs, ehs) - self.assertIs(p._onion_tor_control_proto, tor_state.protocol) + self.assertIs(p._onion_tor_control_proto, tor_instance.protocol) cfs.assert_called_with(reactor, "ep_desc") - txtorcon.build_tor_connection.assert_called_with(tcep) + txtorcon.connect.assert_called_with(reactor, tcep) txtorcon.EphemeralHiddenService.assert_called_with("456 127.0.0.1:123", b"private key") - ehs.add_to_tor.assert_called_with(tor_state.protocol) + ehs.add_to_tor.assert_called_with(tor_instance.protocol) yield p.stopService() - ehs.remove_from_tor.assert_called_with(tor_state.protocol) + ehs.remove_from_tor.assert_called_with(tor_instance.protocol) diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index 4c828817a..08dce0ac0 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -565,7 +565,9 @@ class WebMixin(TimezoneMixin): returnValue(data) @inlineCallbacks - def HEAD(self, urlpath, return_response=False, headers={}): + def HEAD(self, urlpath, return_response=False, headers=None): + if headers is None: + headers = {} url = self.webish_url + urlpath response = yield treq.request("head", url, persistent=False, headers=headers) @@ -573,7 +575,9 @@ class WebMixin(TimezoneMixin): raise Error(response.code, response="") returnValue( ("", response.code, response.headers) ) - def PUT(self, urlpath, data, headers={}): + def PUT(self, urlpath, data, headers=None): + if headers is None: + headers = {} url = self.webish_url + urlpath return do_http("put", url, data=data, headers=headers) @@ -618,7 +622,9 @@ class WebMixin(TimezoneMixin): body, headers = self.build_form(**fields) return self.POST2(urlpath, body, headers) - def POST2(self, urlpath, body="", headers={}, followRedirect=False): + def POST2(self, urlpath, body="", headers=None, followRedirect=False): + if headers is None: + headers = {} url = self.webish_url + urlpath if isinstance(body, str): body = body.encode("utf-8") diff --git a/src/allmydata/testing/web.py b/src/allmydata/testing/web.py index 4f68b3774..95e92825b 100644 --- a/src/allmydata/testing/web.py +++ b/src/allmydata/testing/web.py @@ -276,6 +276,15 @@ class _SynchronousProducer(object): consumer.write(self.body) return succeed(None) + def stopProducing(self): + pass + + def pauseProducing(self): + pass + + def resumeProducing(self): + pass + def create_tahoe_treq_client(root=None): """ diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py index 5641771d3..fccf05db9 100644 --- a/src/allmydata/uri.py +++ b/src/allmydata/uri.py @@ -6,26 +6,11 @@ Ported to Python 3. Methods ending in to_string() are actually to_bytes(), possibly should be fixed in follow-up port. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - # Don't import bytes or str, to prevent future's newbytes leaking and - # breaking code that only expects normal bytes. - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, max, min # noqa: F401 - from past.builtins import unicode as str from past.builtins import unicode, long import re - -try: - from typing import Type -except ImportError: - pass +from typing import Type from zope.interface import implementer from twisted.python.components import registerAdapter @@ -707,7 +692,7 @@ class DirectoryURIVerifier(_DirectoryBaseURI): BASE_STRING=b'URI:DIR2-Verifier:' BASE_STRING_RE=re.compile(b'^'+BASE_STRING) - INNER_URI_CLASS=SSKVerifierURI # type: Type[IVerifierURI] + INNER_URI_CLASS : Type[IVerifierURI] = SSKVerifierURI def __init__(self, filenode_uri=None): if filenode_uri: diff --git a/src/allmydata/util/base32.py b/src/allmydata/util/base32.py index ab65beeac..19a3bbe26 100644 --- a/src/allmydata/util/base32.py +++ b/src/allmydata/util/base32.py @@ -3,30 +3,11 @@ Base32 encoding. Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 - -if PY2: - def backwardscompat_bytes(b): - """ - Replace Future bytes with native Python 2 bytes, so % works - consistently until other modules are ported. - """ - return getattr(b, "__native__", lambda: b)() - import string - maketrans = string.maketrans -else: - def backwardscompat_bytes(b): - return b - maketrans = bytes.maketrans - from typing import Optional +def backwardscompat_bytes(b): + return b +maketrans = bytes.maketrans +from typing import Optional import base64 from allmydata.util.assertutil import precondition @@ -34,7 +15,7 @@ from allmydata.util.assertutil import precondition rfc3548_alphabet = b"abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus... chars = rfc3548_alphabet -vals = backwardscompat_bytes(bytes(range(32))) +vals = bytes(range(32)) c2vtranstable = maketrans(chars, vals) v2ctranstable = maketrans(vals, chars) identitytranstable = maketrans(b'', b'') @@ -61,16 +42,16 @@ def get_trailing_chars_without_lsbs(N): d = {} return b''.join(_get_trailing_chars_without_lsbs(N, d=d)) -BASE32CHAR = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(0)+b']') -BASE32CHAR_4bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(1)+b']') -BASE32CHAR_3bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(2)+b']') -BASE32CHAR_2bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(3)+b']') -BASE32CHAR_1bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(4)+b']') -BASE32STR_1byte = backwardscompat_bytes(BASE32CHAR+BASE32CHAR_3bits) -BASE32STR_2bytes = backwardscompat_bytes(BASE32CHAR+b'{3}'+BASE32CHAR_1bits) -BASE32STR_3bytes = backwardscompat_bytes(BASE32CHAR+b'{4}'+BASE32CHAR_4bits) -BASE32STR_4bytes = backwardscompat_bytes(BASE32CHAR+b'{6}'+BASE32CHAR_2bits) -BASE32STR_anybytes = backwardscompat_bytes(bytes(b'((?:%s{8})*') % (BASE32CHAR,) + bytes(b"(?:|%s|%s|%s|%s))") % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes)) +BASE32CHAR = b'['+get_trailing_chars_without_lsbs(0)+b']' +BASE32CHAR_4bits = b'['+get_trailing_chars_without_lsbs(1)+b']' +BASE32CHAR_3bits = b'['+get_trailing_chars_without_lsbs(2)+b']' +BASE32CHAR_2bits = b'['+get_trailing_chars_without_lsbs(3)+b']' +BASE32CHAR_1bits = b'['+get_trailing_chars_without_lsbs(4)+b']' +BASE32STR_1byte = BASE32CHAR+BASE32CHAR_3bits +BASE32STR_2bytes = BASE32CHAR+b'{3}'+BASE32CHAR_1bits +BASE32STR_3bytes = BASE32CHAR+b'{4}'+BASE32CHAR_4bits +BASE32STR_4bytes = BASE32CHAR+b'{6}'+BASE32CHAR_2bits +BASE32STR_anybytes = bytes(b'((?:%s{8})*') % (BASE32CHAR,) + bytes(b"(?:|%s|%s|%s|%s))") % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes) def b2a(os): # type: (bytes) -> bytes """ @@ -80,7 +61,7 @@ def b2a(os): # type: (bytes) -> bytes """ return base64.b32encode(os).rstrip(b"=").lower() -def b2a_or_none(os): # type: (Optional[bytes]) -> Optional[bytes] +def b2a_or_none(os: Optional[bytes]) -> Optional[bytes]: if os is not None: return b2a(os) return None @@ -100,8 +81,6 @@ NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,) NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4) NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,) NUM_QS_TO_NUM_BITS=tuple([_x*8 for _x in NUM_QS_TO_NUM_OS]) -if PY2: - del _x # A fast way to determine whether a given string *could* be base-32 encoded data, assuming that the # original data had 8K bits for a positive integer K. @@ -135,8 +114,6 @@ def a2b(cs): # type: (bytes) -> bytes """ @param cs the base-32 encoded data (as bytes) """ - # Workaround Future newbytes issues by converting to real bytes on Python 2: - cs = backwardscompat_bytes(cs) precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs) precondition(isinstance(cs, bytes), cs) @@ -144,9 +121,8 @@ def a2b(cs): # type: (bytes) -> bytes # Add padding back, to make Python's base64 module happy: while (len(cs) * 5) % 8 != 0: cs += b"=" - # Let newbytes come through and still work on Python 2, where the base64 - # module gets confused by them. - return base64.b32decode(backwardscompat_bytes(cs)) + + return base64.b32decode(cs) __all__ = ["b2a", "a2b", "b2a_or_none", "BASE32CHAR_3bits", "BASE32CHAR_1bits", "BASE32CHAR", "BASE32STR_anybytes", "could_be_base32_encoded"] diff --git a/src/allmydata/util/connection_status.py b/src/allmydata/util/connection_status.py index 0e8595e81..0ccdcd672 100644 --- a/src/allmydata/util/connection_status.py +++ b/src/allmydata/util/connection_status.py @@ -1,21 +1,13 @@ """ Parse connection status from Foolscap. - -Ported to Python 3. """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import time from zope.interface import implementer from ..interfaces import IConnectionStatus +from foolscap.reconnector import Reconnector @implementer(IConnectionStatus) class ConnectionStatus(object): @@ -41,7 +33,7 @@ class ConnectionStatus(object): last_received_time=None, ) -def _hint_statuses(which, handlers, statuses): +def _hint_statuses(which, handlers, statuses) -> dict[str, str]: non_connected_statuses = {} for hint in which: handler = handlers.get(hint) @@ -50,7 +42,7 @@ def _hint_statuses(which, handlers, statuses): non_connected_statuses["%s%s" % (hint, handler_dsc)] = dsc return non_connected_statuses -def from_foolscap_reconnector(rc, last_received): +def from_foolscap_reconnector(rc: Reconnector, last_received: int, time=time.time) -> ConnectionStatus: ri = rc.getReconnectionInfo() # See foolscap/reconnector.py, ReconnectionInfo, for details about possible # states. The returned result is a native string, it seems, so convert to @@ -80,7 +72,7 @@ def from_foolscap_reconnector(rc, last_received): # ci describes the current in-progress attempt summary = "Trying to connect" elif state == "waiting": - now = time.time() + now = time() elapsed = now - ri.lastAttempt delay = ri.nextAttempt - now summary = "Reconnecting in %d seconds (last attempt %ds ago)" % \ diff --git a/src/allmydata/util/dbutil.py b/src/allmydata/util/dbutil.py index 916382972..45e59cf00 100644 --- a/src/allmydata/util/dbutil.py +++ b/src/allmydata/util/dbutil.py @@ -25,7 +25,7 @@ class DBError(Exception): def get_db(dbfile, stderr=sys.stderr, - create_version=(None, None), updaters={}, just_create=False, dbname="db", + create_version=(None, None), updaters=None, just_create=False, dbname="db", ): """Open or create the given db file. The parent directory must exist. create_version=(SCHEMA, VERNUM), and SCHEMA must have a 'version' table. @@ -33,6 +33,8 @@ def get_db(dbfile, stderr=sys.stderr, to get from ver=1 to ver=2. Returns a (sqlite3,db) tuple, or raises DBError. """ + if updaters is None: + updaters = {} must_create = not os.path.exists(dbfile) try: db = sqlite3.connect(dbfile) diff --git a/src/allmydata/util/deferredutil.py b/src/allmydata/util/deferredutil.py index 782663e8b..9e8d7bad4 100644 --- a/src/allmydata/util/deferredutil.py +++ b/src/allmydata/util/deferredutil.py @@ -1,22 +1,29 @@ """ Utilities for working with Twisted Deferreds. - -Ported to Python 3. """ +from __future__ import annotations + import time from functools import wraps from typing import ( Callable, Any, + Sequence, + TypeVar, + Optional, + Coroutine, + Generator ) +from typing_extensions import ParamSpec from foolscap.api import eventually from eliot.twisted import ( inline_callbacks, ) from twisted.internet import defer, reactor, error +from twisted.internet.defer import Deferred from twisted.python.failure import Failure from allmydata.util import log @@ -204,10 +211,9 @@ class WaitForDelayedCallsMixin(PollMixin): @inline_callbacks def until( - action, # type: Callable[[], defer.Deferred[Any]] - condition, # type: Callable[[], bool] -): - # type: (...) -> defer.Deferred[None] + action: Callable[[], defer.Deferred[Any]], + condition: Callable[[], bool], +) -> Generator[Any, None, None]: """ Run a Deferred-returning function until a condition is true. @@ -222,7 +228,11 @@ def until( break -def async_to_deferred(f): +P = ParamSpec("P") +R = TypeVar("R") + + +def async_to_deferred(f: Callable[P, Coroutine[defer.Deferred[R], None, R]]) -> Callable[P, Deferred[R]]: """ Wrap an async function to return a Deferred instead. @@ -230,7 +240,99 @@ def async_to_deferred(f): """ @wraps(f) - def not_async(*args, **kwargs): + def not_async(*args: P.args, **kwargs: P.kwargs) -> Deferred[R]: return defer.Deferred.fromCoroutine(f(*args, **kwargs)) return not_async + + +class MultiFailure(Exception): + """ + More than one failure occurred. + """ + + def __init__(self, failures: Sequence[Failure]) -> None: + super(MultiFailure, self).__init__() + self.failures = failures + + +_T = TypeVar("_T") + +# Eventually this should be in Twisted upstream: +# https://github.com/twisted/twisted/pull/11818 +def race(ds: Sequence[Deferred[_T]]) -> Deferred[tuple[int, _T]]: + """ + Select the first available result from the sequence of Deferreds and + cancel the rest. + @return: A cancellable L{Deferred} that fires with the index and output of + the element of C{ds} to have a success result first, or that fires + with L{MultiFailure} holding a list of their failures if they all + fail. + """ + # Keep track of the Deferred for the action which completed first. When + # it completes, all of the other Deferreds will get cancelled but this one + # shouldn't be. Even though it "completed" it isn't really done - the + # caller will still be using it for something. If we cancelled it, + # cancellation could propagate down to them. + winner: Optional[Deferred] = None + + # The cancellation function for the Deferred this function returns. + def cancel(result: Deferred) -> None: + # If it is cancelled then we cancel all of the Deferreds for the + # individual actions because there is no longer the possibility of + # delivering any of their results anywhere. We don't have to fire + # `result` because the Deferred will do that for us. + for d in to_cancel: + d.cancel() + + # The Deferred that this function will return. It will fire with the + # index and output of the action that completes first, or None if all of + # the actions fail. If it is cancelled, all of the actions will be + # cancelled. + final_result: Deferred[tuple[int, _T]] = Deferred(canceller=cancel) + + # A callback for an individual action. + def succeeded(this_output: _T, this_index: int) -> None: + # If it is the first action to succeed then it becomes the "winner", + # its index/output become the externally visible result, and the rest + # of the action Deferreds get cancelled. If it is not the first + # action to succeed (because some action did not support + # cancellation), just ignore the result. It is uncommon for this + # callback to be entered twice. The only way it can happen is if one + # of the input Deferreds has a cancellation function that fires the + # Deferred with a success result. + nonlocal winner + if winner is None: + # This is the first success. Act on it. + winner = to_cancel[this_index] + + # Cancel the rest. + for d in to_cancel: + if d is not winner: + d.cancel() + + # Fire our Deferred + final_result.callback((this_index, this_output)) + + # Keep track of how many actions have failed. If they all fail we need to + # deliver failure notification on our externally visible result. + failure_state = [] + + def failed(failure: Failure, this_index: int) -> None: + failure_state.append((this_index, failure)) + if len(failure_state) == len(to_cancel): + # Every operation failed. + failure_state.sort() + failures = [f for (ignored, f) in failure_state] + final_result.errback(MultiFailure(failures)) + + # Copy the sequence of Deferreds so we know it doesn't get mutated out + # from under us. + to_cancel = list(ds) + for index, d in enumerate(ds): + # Propagate the position of this action as well as the argument to f + # to the success callback so we can cancel the right Deferreds and + # propagate the result outwards. + d.addCallbacks(succeeded, failed, callbackArgs=(index,), errbackArgs=(index,)) + + return final_result diff --git a/src/allmydata/util/pollmixin.py b/src/allmydata/util/pollmixin.py index 582bafe86..b23277565 100644 --- a/src/allmydata/util/pollmixin.py +++ b/src/allmydata/util/pollmixin.py @@ -4,22 +4,10 @@ Polling utility that returns Deferred. Ported to Python 3. """ -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from future.utils import PY2 -if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations import time -try: - from typing import List -except ImportError: - pass - from twisted.internet import task class TimeoutError(Exception): @@ -29,7 +17,7 @@ class PollComplete(Exception): pass class PollMixin(object): - _poll_should_ignore_these_errors = [] # type: List[Exception] + _poll_should_ignore_these_errors : list[Exception] = [] def poll(self, check_f, pollinterval=0.01, timeout=1000): # Return a Deferred, then call check_f periodically until it returns diff --git a/src/allmydata/util/tor_provider.py b/src/allmydata/util/tor_provider.py index 4ca19c01c..aaf43db73 100644 --- a/src/allmydata/util/tor_provider.py +++ b/src/allmydata/util/tor_provider.py @@ -2,14 +2,10 @@ """ Ported to Python 3. """ -from __future__ import absolute_import, print_function, with_statement -from __future__ import division -from __future__ import unicode_literals -from future.utils import PY2 -if PY2: - from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from __future__ import annotations +from typing import Optional import os from zope.interface import ( @@ -27,6 +23,7 @@ from ..interfaces import ( IAddressFamily, ) + def _import_tor(): try: from foolscap.connections import tor @@ -41,7 +38,7 @@ def _import_txtorcon(): except ImportError: # pragma: no cover return None -def create(reactor, config, import_tor=None, import_txtorcon=None): +def create(reactor, config, import_tor=None, import_txtorcon=None) -> Optional[_Provider]: """ Create a new _Provider service (this is an IService so must be hooked up to a parent or otherwise started). @@ -98,33 +95,31 @@ def _try_to_connect(reactor, endpoint_desc, stdout, txtorcon): @inlineCallbacks def _launch_tor(reactor, tor_executable, private_dir, txtorcon): + """ + Launches Tor, returns a corresponding ``(control endpoint string, + txtorcon.Tor instance)`` tuple. + """ # TODO: handle default tor-executable # TODO: it might be a good idea to find exactly which Tor we used, # and record it's absolute path into tahoe.cfg . This would protect # us against one Tor being on $PATH at create-node time, but then a # different Tor being present at node startup. OTOH, maybe we don't # need to worry about it. - tor_config = txtorcon.TorConfig() - tor_config.DataDirectory = data_directory(private_dir) # unix-domain control socket - tor_config.ControlPort = "unix:" + os.path.join(private_dir, "tor.control") - tor_control_endpoint_desc = tor_config.ControlPort + tor_control_endpoint_desc = "unix:" + os.path.join(private_dir, "tor.control") - tor_config.SOCKSPort = allocate_tcp_port() - - tpp = yield txtorcon.launch_tor( - tor_config, reactor, + tor = yield txtorcon.launch( + reactor, + control_port=tor_control_endpoint_desc, + data_directory=data_directory(private_dir), tor_binary=tor_executable, + socks_port=allocate_tcp_port(), # can be useful when debugging; mirror Tor's output to ours # stdout=sys.stdout, # stderr=sys.stderr, ) - # now tor is launched and ready to be spoken to - # as a side effect, we've got an ITorControlProtocol ready to go - tor_control_proto = tpp.tor_protocol - # How/when to shut down the new process? for normal usage, the child # tor will exit when it notices its parent (us) quit. Unit tests will # mock out txtorcon.launch_tor(), so there will never be a real Tor @@ -134,7 +129,8 @@ def _launch_tor(reactor, tor_executable, private_dir, txtorcon): # (because it's a TorProcessProtocol) which returns a Deferred # that fires when Tor has actually exited. - returnValue((tor_control_endpoint_desc, tor_control_proto)) + returnValue((tor_control_endpoint_desc, tor)) + @inlineCallbacks def _connect_to_tor(reactor, cli_config, txtorcon): @@ -169,8 +165,9 @@ def create_config(reactor, cli_config): if tor_executable: tahoe_config_tor["tor.executable"] = tor_executable print("launching Tor (to allocate .onion address)..", file=stdout) - (_, tor_control_proto) = yield _launch_tor( + (_, tor) = yield _launch_tor( reactor, tor_executable, private_dir, txtorcon) + tor_control_proto = tor.protocol print("Tor launched", file=stdout) else: print("connecting to Tor (to allocate .onion address)..", file=stdout) @@ -294,7 +291,7 @@ class _Provider(service.MultiService): returnValue(tor_control_endpoint) def _get_launched_tor(self, reactor): - # this fires with a tuple of (control_endpoint, tor_protocol) + # this fires with a tuple of (control_endpoint, txtorcon.Tor instance) if not self._tor_launched: self._tor_launched = OneShotObserverList() private_dir = self._config.get_config_path("private") @@ -325,17 +322,20 @@ class _Provider(service.MultiService): require("external_port") require("private_key_file") - @inlineCallbacks - def _start_onion(self, reactor): + def get_tor_instance(self, reactor: object): + """Return a ``Deferred`` that fires with a ``txtorcon.Tor`` instance.""" # launch tor, if necessary if self._get_tor_config("launch", False, boolean=True): - (_, tor_control_proto) = yield self._get_launched_tor(reactor) + return self._get_launched_tor(reactor).addCallback(lambda t: t[1]) else: controlport = self._get_tor_config("control.port", None) tcep = clientFromString(reactor, controlport) - tor_state = yield self._txtorcon.build_tor_connection(tcep) - tor_control_proto = tor_state.protocol + return self._txtorcon.connect(reactor, tcep) + @inlineCallbacks + def _start_onion(self, reactor): + tor_instance = yield self.get_tor_instance(reactor) + tor_control_proto = tor_instance.protocol local_port = int(self._get_tor_config("onion.local_port")) external_port = int(self._get_tor_config("onion.external_port")) diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index 3d85b1c4d..1a0ba433b 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -87,6 +87,7 @@ from allmydata.util.encodingutil import ( from allmydata.util import abbreviate from allmydata.crypto.rsa import PrivateKey, PublicKey, create_signing_keypair_from_string + class WebError(Exception): def __init__(self, text, code=http.BAD_REQUEST): self.text = text @@ -117,7 +118,7 @@ def boolean_of_arg(arg): # type: (bytes) -> bool return arg.lower() in (b"true", b"t", b"1", b"on") -def parse_replace_arg(replace): # type: (bytes) -> Union[bool,_OnlyFiles] +def parse_replace_arg(replace: bytes) -> Union[bool,_OnlyFiles]: assert isinstance(replace, bytes) if replace.lower() == b"only-files": return ONLY_FILES @@ -723,16 +724,21 @@ def get_arg(req: IRequest, argname: str | bytes, default: Optional[T] = None, *, :return: Either bytes or tuple of bytes. """ + # Need to import here to prevent circular import: + from ..webish import TahoeLAFSRequest + if isinstance(argname, str): argname_bytes = argname.encode("utf-8") else: argname_bytes = argname - results = [] - if argname_bytes in req.args: + results : list[bytes] = [] + if req.args is not None and argname_bytes in req.args: results.extend(req.args[argname_bytes]) argname_unicode = str(argname_bytes, "utf-8") - if req.fields and argname_unicode in req.fields: + if isinstance(req, TahoeLAFSRequest) and req.fields and argname_unicode in req.fields: + # In all but one or two unit tests, the request will be a + # TahoeLAFSRequest. value = req.fields[argname_unicode].value if isinstance(value, str): value = value.encode("utf-8") diff --git a/src/allmydata/web/operations.py b/src/allmydata/web/operations.py index aedf33f37..a564f8484 100644 --- a/src/allmydata/web/operations.py +++ b/src/allmydata/web/operations.py @@ -43,8 +43,9 @@ DAY = 24*HOUR class OphandleTable(resource.Resource, service.Service): """Renders /operations/%d.""" - - name = "operations" + # The type in Twisted for services is wrong in 22.10... + # https://github.com/twisted/twisted/issues/10135 + name = "operations" # type: ignore[assignment] UNCOLLECTED_HANDLE_LIFETIME = 4*DAY COLLECTED_HANDLE_LIFETIME = 1*DAY diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 1b2b8192a..ec2582f80 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -242,7 +242,9 @@ class TahoeLAFSSite(Site, object): class WebishServer(service.MultiService): - name = "webish" + # The type in Twisted for services is wrong in 22.10... + # https://github.com/twisted/twisted/issues/10135 + name = "webish" # type: ignore[assignment] def __init__(self, client, webport, tempdir, nodeurl_path=None, staticdir=None, clock=None, now_fn=time.time): diff --git a/tox.ini b/tox.ini index 3e2dacbb2..2f245f2ed 100644 --- a/tox.ini +++ b/tox.ini @@ -10,6 +10,7 @@ python = 3.8: py38-coverage 3.9: py39-coverage 3.10: py310-coverage + 3.11: py311-coverage pypy-3.8: pypy38 pypy-3.9: pypy39 @@ -17,43 +18,39 @@ python = twisted = 1 [tox] -envlist = typechecks,codechecks,py{38,39,310}-{coverage},pypy27,pypy38,pypy39,integration +envlist = typechecks,codechecks,py{38,39,310,311}-{coverage},pypy27,pypy38,pypy39,integration minversion = 2.4 [testenv] passenv = TAHOE_LAFS_* PIP_* SUBUNITREPORTER_* USERPROFILE HOMEDRIVE HOMEPATH -# Get "certifi" to avoid bug #2913. Basically if a `setup_requires=...` causes -# a package to be installed (with setuptools) then it'll fail on certain -# platforms (travis's OX-X 10.12, Slackware 14.2) because PyPI's TLS -# requirements (TLS >= 1.2) are incompatible with the old TLS clients -# available to those systems. Installing it ahead of time (with pip) avoids -# this problem. deps = - # Pin all of these versions for the same reason you ever want to pin - # anything: to prevent new releases with regressions from introducing - # spurious failures into CI runs for whatever development work is - # happening at the time. The versions selected here are just the current - # versions at the time. Bumping them to keep up with future releases is - # fine as long as those releases are known to actually work. - pip==22.0.3 - setuptools==60.9.1 - wheel==0.37.1 - subunitreporter==22.2.0 - # As an exception, we don't pin certifi because it contains CA - # certificates which necessarily change over time. Pinning this is - # guaranteed to cause things to break eventually as old certificates - # expire and as new ones are used in the wild that aren't present in - # whatever version we pin. Hopefully there won't be functionality - # regressions in new releases of this package that cause us the kind of - # suffering we're trying to avoid with the above pins. - certifi + # We pull in certify *here* to avoid bug #2913. Basically if a + # `setup_requires=...` causes a package to be installed (with setuptools) + # then it'll fail on certain platforms (travis's OX-X 10.12, Slackware + # 14.2) because PyPI's TLS requirements (TLS >= 1.2) are incompatible with + # the old TLS clients available to those systems. Installing it ahead of + # time (with pip) avoids this problem. + # + # We don't pin an exact version of it because it contains CA certificates + # which necessarily change over time. Pinning this is guaranteed to cause + # things to break eventually as old certificates expire and as new ones + # are used in the wild that aren't present in whatever version we pin. + # Hopefully there won't be functionality regressions in new releases of + # this package that cause us the kind of suffering we're trying to avoid + # with the above pins. + certifi # We add usedevelop=False because testing against a true installation gives # more useful results. usedevelop = False -# We use extras=test to get things like "mock" that are required for our unit -# tests. -extras = test + +extras = + # Get general testing environment dependencies so we can run the tests + # how we like. + testenv + + # And get all of the test suite's actual direct Python dependencies. + test setenv = # Define TEST_SUITE in the environment as an aid to constructing the @@ -98,11 +95,12 @@ commands = [testenv:codechecks] basepython = python3 +skip_install = true deps = - # Newer versions of PyLint have buggy configuration - # (https://github.com/PyCQA/pylint/issues/4574), so stick to old version - # for now. - pylint < 2.5 + # Pin a specific version so we get consistent outcomes; update this + # occasionally: + ruff == 0.0.263 + towncrier # On macOS, git inside of towncrier needs $HOME. passenv = HOME setenv = @@ -110,13 +108,9 @@ setenv = # entire codebase, including various pieces of supporting code. DEFAULT_FILES=src integration static misc setup.py commands = - flake8 {posargs:{env:DEFAULT_FILES}} + ruff check {posargs:{env:DEFAULT_FILES}} python misc/coding_tools/check-umids.py {posargs:{env:DEFAULT_FILES}} python misc/coding_tools/check-debugging.py {posargs:{env:DEFAULT_FILES}} - python misc/coding_tools/find-trailing-spaces.py -r {posargs:{env:DEFAULT_FILES}} - # PyLint has other useful checks, might want to enable them: - # http://pylint.pycqa.org/en/latest/technical_reference/features.html - pylint --disable=all --enable=cell-var-from-loop {posargs:{env:DEFAULT_FILES}} # If towncrier.check fails, you forgot to add a towncrier news # fragment explaining the change in this branch. Create one at @@ -127,20 +121,18 @@ commands = [testenv:typechecks] basepython = python3 -skip_install = True deps = - mypy - mypy-zope + mypy==1.3.0 + # When 0.9.2 comes out it will work with 1.3, it's just unreleased at the moment... + git+https://github.com/shoobx/mypy-zope@f276030 types-mock types-six types-PyYAML types-pkg_resources types-pyOpenSSL - git+https://github.com/warner/foolscap - # Twisted 21.2.0 introduces some type hints which we are not yet - # compatible with. - # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3622 - twisted<21.2.0 + foolscap + # Upgrade when new releases come out: + Twisted==22.10.0 commands = mypy src