From e937b062a1fa8b3597fcefe3514cc81c59952b8e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 15:56:42 -0500 Subject: [PATCH 01/93] news fragment --- newsfragments/3385.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3385.minor diff --git a/newsfragments/3385.minor b/newsfragments/3385.minor new file mode 100644 index 000000000..e69de29bb From 13bcd8170baf586b9b6a59273b6e924f312a6f0d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 15:57:10 -0500 Subject: [PATCH 02/93] Turn on Coveralls on GitHub actions --- .github/workflows/ci.yml | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd5049104..d401a1f24 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,11 +59,41 @@ jobs: name: eliot.log path: eliot.log - - name: Upload coverage report - uses: codecov/codecov-action@v1 + # Upload this job's coverage data to Coveralls. + - name: "Report Coverage to Coveralls" + uses: "coverallsapp/github-action@v1.1.2" with: - token: abf679b6-e2e6-4b33-b7b5-6cfbd41ee691 - file: coverage.xml + github-token: "${{ secrets.github_token }}" + # Every source of coverage reports needs a unique "flag name". + # Construct one by smashing a few variables from the matrix together + # here. + flag-name: "run-${{ matrix.os }}-${{ matrix.python-version }}" + # Mark the data as just one piece of many because we have more than + # one instance of this job (Windows, macOS) which collects and + # reports coverage. This is necessary to cause Coveralls to merge + # multiple coverage results into a single report. + parallel: true + + # Tell Coveralls that we're done reporting coverage data. Since we're using + # the "parallel" mode where more than one coverage data file is merged into + # a single report, we have to tell Coveralls when we've uploaded all of the + # data files. This does it. We make sure it runs last by making it depend + # on *all* of the coverage-collecting jobs. + finish-coverage-report: + # There happens to just be one coverage-collecting job at the moment. If + # the coverage reports are broken and someone added more + # coverage-collecting jobs to this workflow but didn't update this, that's + # why. + needs: + - "coverage" + runs-on: "ubuntu-latest" + steps: + - name: "Finish Coveralls Reporting" + uses: "coverallsapp/github-action@v1.1.2" + with: + github-token: "${{ secrets.github_token }}" + # Here's the magic that tells Coveralls we're done. + parallel-finished: true integration: runs-on: ${{ matrix.os }} From b4128a8d10eac897794b517e867e34c11e4c551c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 15:58:21 -0500 Subject: [PATCH 03/93] Stop collecting coverage on CircleCI --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index afa3fafa1..8a1452714 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -210,7 +210,7 @@ jobs: # filenames and argv). LANG: "en_US.UTF-8" # Select a tox environment to run for this job. - TAHOE_LAFS_TOX_ENVIRONMENT: "py27-coverage" + TAHOE_LAFS_TOX_ENVIRONMENT: "py27" # Additional arguments to pass to tox. TAHOE_LAFS_TOX_ARGS: "" # The path in which test artifacts will be placed. @@ -220,7 +220,7 @@ jobs: WHEELHOUSE_PATH: &WHEELHOUSE_PATH "/tmp/wheelhouse" PIP_FIND_LINKS: "file:///tmp/wheelhouse" # Upload the coverage report. - UPLOAD_COVERAGE: "yes" + UPLOAD_COVERAGE: "" # pip cannot install packages if the working directory is not readable. # We want to run a lot of steps as nobody instead of as root. @@ -373,7 +373,7 @@ jobs: # this reporter on Python 3. So drop that and just specify the # reporter. TAHOE_LAFS_TRIAL_ARGS: "--reporter=subunitv2-file" - TAHOE_LAFS_TOX_ENVIRONMENT: "py36-coverage" + TAHOE_LAFS_TOX_ENVIRONMENT: "py36" ubuntu-20-04: From ca8f7d73f2ab29a0236432aaa024b94bf8768fe4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 15:59:03 -0500 Subject: [PATCH 04/93] Stop using codecov --- .codecov.yml | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) delete mode 100644 .codecov.yml diff --git a/.codecov.yml b/.codecov.yml deleted file mode 100644 index 166190c5e..000000000 --- a/.codecov.yml +++ /dev/null @@ -1,48 +0,0 @@ -# Override defaults for codecov.io checks. -# -# Documentation is at https://docs.codecov.io/docs/codecov-yaml; -# reference is at https://docs.codecov.io/docs/codecovyml-reference. -# -# To validate this file, use: -# -# curl --data-binary @.codecov.yml https://codecov.io/validate -# -# Codecov's defaults seem to leave red marks in GitHub CI checks in a -# rather arbitrary manner, probably because of non-determinism in -# coverage (see https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2891) -# and maybe because computers are bad with floating point numbers. - -# Allow coverage percentage a precision of zero decimals, and round to -# the nearest number (for example, 89.957 to to 90; 89.497 to 89%). -# Coverage above 90% is good, below 80% is bad. -coverage: - round: nearest - range: 80..90 - precision: 0 - - # Aim for a target test coverage of 90% in codecov/project check (do - # not allow project coverage to drop below that), and allow - # codecov/patch a threshold of 1% (allow coverage in changes to drop - # by that much, and no less). That should be good enough for us. - status: - project: - default: - target: 90% - threshold: 1% - patch: - default: - threshold: 1% - - -codecov: - # This is a public repository so supposedly we don't "need" to use an upload - # token. However, using one makes sure that CI jobs running against forked - # repositories have coverage uploaded to the right place in codecov so - # their reports aren't incomplete. - token: "abf679b6-e2e6-4b33-b7b5-6cfbd41ee691" - - notify: - # The reference documentation suggests that this is the default setting: - # https://docs.codecov.io/docs/codecovyml-reference#codecovnotifywait_for_ci - # However observation suggests otherwise. - wait_for_ci: true From 3fb412eda1c71db389c224b2f395cf57c5974500 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 16:20:41 -0500 Subject: [PATCH 05/93] Perhaps this is the correct github-token to use. The coveralls docs might be wrong. --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d401a1f24..3cc59124d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" uses: "coverallsapp/github-action@v1.1.2" with: - github-token: "${{ secrets.github_token }}" + github-token: "${{ secrets.GITHUB_TOKEN }}" # Every source of coverage reports needs a unique "flag name". # Construct one by smashing a few variables from the matrix together # here. @@ -91,7 +91,7 @@ jobs: - name: "Finish Coveralls Reporting" uses: "coverallsapp/github-action@v1.1.2" with: - github-token: "${{ secrets.github_token }}" + github-token: "${{ secrets.GITHUB_TOKEN }}" # Here's the magic that tells Coveralls we're done. parallel-finished: true From bebcca39f62c872107c27ae637bef9ed6c19ecaa Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 20:09:46 -0500 Subject: [PATCH 06/93] Switch to coveralls-python, maybe it works better --- .github/workflows/ci.yml | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3cc59124d..ec640fa88 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,18 +61,23 @@ jobs: # Upload this job's coverage data to Coveralls. - name: "Report Coverage to Coveralls" - uses: "coverallsapp/github-action@v1.1.2" - with: - github-token: "${{ secrets.GITHUB_TOKEN }}" + run: | + pip install coveralls + python -m coveralls + env: + # Some magic value required for some magic reason. + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + # Help coveralls identify our project. + COVERALLS_REPO_TOKEN: "JPf16rLB7T2yjgATIxFzTsEgMdN1UNq6o" # Every source of coverage reports needs a unique "flag name". # Construct one by smashing a few variables from the matrix together # here. - flag-name: "run-${{ matrix.os }}-${{ matrix.python-version }}" + COVERALLS_FLAG_NAME: "run-${{ matrix.os }}-${{ matrix.python-version }}" # Mark the data as just one piece of many because we have more than # one instance of this job (Windows, macOS) which collects and # reports coverage. This is necessary to cause Coveralls to merge # multiple coverage results into a single report. - parallel: true + COVERALLS_PARALLEL: true # Tell Coveralls that we're done reporting coverage data. Since we're using # the "parallel" mode where more than one coverage data file is merged into @@ -89,11 +94,14 @@ jobs: runs-on: "ubuntu-latest" steps: - name: "Finish Coveralls Reporting" - uses: "coverallsapp/github-action@v1.1.2" - with: - github-token: "${{ secrets.GITHUB_TOKEN }}" - # Here's the magic that tells Coveralls we're done. - parallel-finished: true + run: | + pip install coveralls + python -m coveralls --finish + env: + # Some magic value required for some magic reason. + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + # Help coveralls identify our project. + COVERALLS_REPO_TOKEN: "JPf16rLB7T2yjgATIxFzTsEgMdN1UNq6o" integration: runs-on: ${{ matrix.os }} From 9a8a61b74042a8870e83e4700a177627e02a528a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 20:31:32 -0500 Subject: [PATCH 07/93] Further tweaks to help the last step --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec640fa88..b89654c0d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,10 +92,14 @@ jobs: needs: - "coverage" runs-on: "ubuntu-latest" + # Get a Python 3 environment because only the Python 3 release of + # coveralls-python has the `--finish` flag... + container: "python:3-slim" steps: - name: "Finish Coveralls Reporting" run: | - pip install coveralls + # Also install wheel otherwise `docopt` may fail to build + pip install wheel coveralls python -m coveralls --finish env: # Some magic value required for some magic reason. From f3aca51e35cbc04571f76347df64d2feafd80718 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 20:52:02 -0500 Subject: [PATCH 08/93] run in debug mode to collect info for bug report --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b89654c0d..810b607af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,7 +100,7 @@ jobs: run: | # Also install wheel otherwise `docopt` may fail to build pip install wheel coveralls - python -m coveralls --finish + python -m coveralls debug --finish env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From 4b65751f5165b2c10febfee9848774a2b8aa591a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 09:00:16 -0500 Subject: [PATCH 09/93] Debug the earlier submissions to see what build_num they're providing --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 810b607af..1d244b4ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" run: | pip install coveralls - python -m coveralls + python -m coveralls debug env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From adf3518fc1d768de9f6aec58e9fe8ddf52f80947 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 14:32:48 -0500 Subject: [PATCH 10/93] Okay I guess that served its purpose --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1d244b4ce..810b607af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" run: | pip install coveralls - python -m coveralls debug + python -m coveralls env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From f24cc5da0c0ac7b631335de9dbb3634b6d48d402 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 14:33:00 -0500 Subject: [PATCH 11/93] The angrier I am the more words I write --- .github/workflows/ci.yml | 77 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 810b607af..74454de6c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,15 +92,80 @@ jobs: needs: - "coverage" runs-on: "ubuntu-latest" - # Get a Python 3 environment because only the Python 3 release of - # coveralls-python has the `--finish` flag... - container: "python:3-slim" + # Get a runtime environment with curl so we can use it to send the HTTP + # request. + container: "curlimages/curl:7.74.0" steps: - name: "Finish Coveralls Reporting" run: | - # Also install wheel otherwise `docopt` may fail to build - pip install wheel coveralls - python -m coveralls debug --finish + # coveralls-python does have a `--finish` option but it doesn't seem + # to work, at least for us. + # https://github.com/coveralls-clients/coveralls-python/issues/248 + # + # But all it does is this simple POST so we can just send it + # ourselves. The only hard part is guessing what the POST + # parameters mean. + # + # Since the build is done I'm going to guess that "done" is a fine + # value for status. + # + # That leaves "build_num". The coveralls documentation gives some + # hints about it. It suggests using $CIRCLE_WORKFLOW_ID if your job + # is on CircleCI. CircleCI documentation says this about + # CIRCLE_WORKFLOW_ID: + # + # A unique identifier for the workflow instance of the current + # job. This identifier is the same for every job in a given + # workflow instance. + # + # (from https://circleci.com/docs/2.0/env-vars/) + # + # A CircleCI workflow is roughly "the group of jobs run for a + # particular commit". There are exceptions to this but maybe we can + # ignore them. + # + # The only over hints we get from Coveralls about "build_num" are: + # + # * An example value of `1234` + # + # * Another example value of `$BUILD_NUMBER` where BUILD_NUMBER is + # not defined anywhere. + # + # Starting from the CircleCI workflow example, then, and looking at + # the environment variables GitHub Actions offers + # (https://docs.github.com/en/free-pro-team@latest/actions/reference/environment-variables#default-environment-variables) there are two of interest: + # + # * GITHUB_RUN_ID - A unique number for each run within a + # repository. This number does not change if you re-run the + # workflow run. + # + # * GITHUB_RUN_NUMBER - A unique number for each run of a particular + # workflow in a repository. This number begins at 1 for the + # workflow's first run, and increments with each new run. This + # number does not change if you re-run the workflow run. + # + # These seem to offer approximately the same value and only differ + # on whether they will be unique for the project as a whole or just + # for this particular workflow ("ci", as defined by this file). + # + # Unfortunately neither of them changes if a workflow is re-run. + # This differs from the behavior of CircleCI's CIRCLE_WORKFLOW_ID + # where a new value is assigned if a workflow is re-run. + # + # The consequence of this would seem to be that multiple runs of a + # GitHub Actions workflow will have their coverage reported to the + # same job. And since we eventually "finish" a job, later runs + # would be discarded (I suppose). + # + # There doesn't seem to be a way to do any better, though. + # + # However, we have the further constraint that our build_num must + # agree with whatever python-coveralls has selected. An inspection + # of the python-coveralls source reveals (perhaps unsurprisingly) + # its authors have selected GITHUB_RUN_ID. + # + # Thus, we select the same. + curl -k https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN -d "payload[build_num]=GITHUB_RUN_ID&payload[status]=done" env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From 8e8a7d82c642b91008d61ab9496465fb384d94ff Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 14:33:31 -0500 Subject: [PATCH 12/93] it was one or the other. turns out it was the other. --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74454de6c..f15610333 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -160,8 +160,8 @@ jobs: # There doesn't seem to be a way to do any better, though. # # However, we have the further constraint that our build_num must - # agree with whatever python-coveralls has selected. An inspection - # of the python-coveralls source reveals (perhaps unsurprisingly) + # agree with whatever coveralls-python has selected. An inspection + # of the coveralls-python source reveals (perhaps unsurprisingly) # its authors have selected GITHUB_RUN_ID. # # Thus, we select the same. From 4d2782c1787838416df0963230aefde3371af631 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 14:57:31 -0500 Subject: [PATCH 13/93] Hahaha. Resolve the variable, don't just include its name. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f15610333..ff4a89ecd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -165,7 +165,7 @@ jobs: # its authors have selected GITHUB_RUN_ID. # # Thus, we select the same. - curl -k https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN -d "payload[build_num]=GITHUB_RUN_ID&payload[status]=done" + curl -k https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN -d "payload[build_num]=$GITHUB_RUN_ID&payload[status]=done" env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From 24a531474df7e3af3c21d37a287853832326bc7d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 15:38:12 -0500 Subject: [PATCH 14/93] So much for my ability to read and understand a Python program coveralls complained: {"error":"No build matching CI build number 467026020 found"} So try constructing a build_num that looks like the value we observed from `coveralls` output when it was submitting coverage data. --- .github/workflows/ci.yml | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ff4a89ecd..e9de9ba86 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,9 +92,6 @@ jobs: needs: - "coverage" runs-on: "ubuntu-latest" - # Get a runtime environment with curl so we can use it to send the HTTP - # request. - container: "curlimages/curl:7.74.0" steps: - name: "Finish Coveralls Reporting" run: | @@ -161,11 +158,25 @@ jobs: # # However, we have the further constraint that our build_num must # agree with whatever coveralls-python has selected. An inspection - # of the coveralls-python source reveals (perhaps unsurprisingly) - # its authors have selected GITHUB_RUN_ID. + # of the coveralls-python source suggests that GITHUB_RUN_ID is + # used. However, observation of the coveralls.io web interface + # suggests the value instead is something more like: + # + # $(git rev-parse refs/remotes/pull//merge)-PR- # # Thus, we select the same. - curl -k https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN -d "payload[build_num]=$GITHUB_RUN_ID&payload[status]=done" + # + # GITHUB_REF is a string like the rev being parsed above. We + # extract the PR number from it. + PR=$(echo $GITHUB_REF | cut -d / -f 4) + REV=$(git rev-parse $GITHUB_REF) + BUILD_NUM=$REV-PR-$PR + REPO_NAME=$GITHUB_REPOSITORY + curl \ + -k \ + https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN \ + -d \ + "payload[build_num]=$BUILD_NUM&payload[status]=done&payload[repo_name]=$REPO_NAME" env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From 52c42b5118100982ae777c82607de4e51c588631 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 15:48:20 -0500 Subject: [PATCH 15/93] dump this info, who knows how many more rounds this will take --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e9de9ba86..2b213493e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" run: | pip install coveralls - python -m coveralls + python -m coveralls --verbose env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From fa863c94782f4011ab8d2c74051a6f0e3d3141d5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 15:48:31 -0500 Subject: [PATCH 16/93] speed up the test cycle --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index c61331885..a1b988b91 100644 --- a/tox.ini +++ b/tox.ini @@ -50,7 +50,7 @@ extras = test setenv = # Define TEST_SUITE in the environment as an aid to constructing the # correct test command below. - !py36: TEST_SUITE = allmydata + !py36: TEST_SUITE = allmydata.test.test_abbreviate py36: TEST_SUITE = allmydata.test.python3_tests commands = From fac12210cb76e5cc3600ed6016ce960eab42095c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 15:58:48 -0500 Subject: [PATCH 17/93] Can't do that Git stuff without a checkout Maybe *that* is why `coveralls --finish` fails? --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2b213493e..243d865e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,6 +93,9 @@ jobs: - "coverage" runs-on: "ubuntu-latest" steps: + - name: "Check out Tahoe-LAFS sources" + uses: "actions/checkout@v2" + - name: "Finish Coveralls Reporting" run: | # coveralls-python does have a `--finish` option but it doesn't seem From 89c54af01db145996d680b8ade82370e3564c7b1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:05:35 -0500 Subject: [PATCH 18/93] Guess we need the rest of the repo too, surprise. --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 243d865e4..473cc7d53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,6 +96,10 @@ jobs: - name: "Check out Tahoe-LAFS sources" uses: "actions/checkout@v2" + - name: "Fetch all history for all tags and branches" + run: | + git fetch --prune --unshallow + - name: "Finish Coveralls Reporting" run: | # coveralls-python does have a `--finish` option but it doesn't seem From d515887ba1b1b0a6af9efb575883ba9e46f12201 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:15:02 -0500 Subject: [PATCH 19/93] This is probably faster and may actually work `git fetch --prune --unshallow` doesn't seem to get refs/remotes/pull//merge but that's okay because HEAD is already set to that --- .github/workflows/ci.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 473cc7d53..633dd85e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,10 +96,6 @@ jobs: - name: "Check out Tahoe-LAFS sources" uses: "actions/checkout@v2" - - name: "Fetch all history for all tags and branches" - run: | - git fetch --prune --unshallow - - name: "Finish Coveralls Reporting" run: | # coveralls-python does have a `--finish` option but it doesn't seem @@ -175,8 +171,11 @@ jobs: # # GITHUB_REF is a string like the rev being parsed above. We # extract the PR number from it. + # + # actions/checkout@v2 makes HEAD the same as refs/remotes/pull//merge so we can just rev-parse that. PR=$(echo $GITHUB_REF | cut -d / -f 4) - REV=$(git rev-parse $GITHUB_REF) + REV=$(git rev-parse HEAD) BUILD_NUM=$REV-PR-$PR REPO_NAME=$GITHUB_REPOSITORY curl \ From 59e385c00f5a85247959e922f20f20438163798f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:24:53 -0500 Subject: [PATCH 20/93] apparently it doesn't have `remotes` in there --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 633dd85e3..7b46acbf3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -165,16 +165,16 @@ jobs: # used. However, observation of the coveralls.io web interface # suggests the value instead is something more like: # - # $(git rev-parse refs/remotes/pull//merge)-PR- + # $(git rev-parse refs/pull//merge)-PR- # # Thus, we select the same. # # GITHUB_REF is a string like the rev being parsed above. We # extract the PR number from it. # - # actions/checkout@v2 makes HEAD the same as refs/remotes/pull//merge so we can just rev-parse that. - PR=$(echo $GITHUB_REF | cut -d / -f 4) + PR=$(echo $GITHUB_REF | cut -d / -f 3) REV=$(git rev-parse HEAD) BUILD_NUM=$REV-PR-$PR REPO_NAME=$GITHUB_REPOSITORY From e3a6f43dc9cf51b9c917d8c585735806b839bc73 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:43:11 -0500 Subject: [PATCH 21/93] less shell wankery --- .github/workflows/ci.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b46acbf3..0451c3bd1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -168,16 +168,19 @@ jobs: # $(git rev-parse refs/pull//merge)-PR- # # Thus, we select the same. - # - # GITHUB_REF is a string like the rev being parsed above. We - # extract the PR number from it. - # - # actions/checkout@v2 makes HEAD the same as refs/pull//merge so we can just rev-parse that. - PR=$(echo $GITHUB_REF | cut -d / -f 3) + + # refs/pull//merge was justed checked out by so we can just + # rev-parse HEAD to find the revision. REV=$(git rev-parse HEAD) + + # We can get the PR number from the "context". + # https://github.community/t/github-ref-is-inconsistent/17728/3 + # https://docs.github.com/en/free-pro-team@latest/articles/events-that-trigger-workflows + PR=${{ github.event.number }} + BUILD_NUM=$REV-PR-$PR REPO_NAME=$GITHUB_REPOSITORY + curl \ -k \ https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN \ From 3855fe0a6d983123dc085eb7d705ca25ce9b8c88 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:43:36 -0500 Subject: [PATCH 22/93] reinstate full test suite --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index a1b988b91..c61331885 100644 --- a/tox.ini +++ b/tox.ini @@ -50,7 +50,7 @@ extras = test setenv = # Define TEST_SUITE in the environment as an aid to constructing the # correct test command below. - !py36: TEST_SUITE = allmydata.test.test_abbreviate + !py36: TEST_SUITE = allmydata py36: TEST_SUITE = allmydata.test.python3_tests commands = From 35614d13828427ad65f3ae9686241f75e7bbb423 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 17:46:40 -0500 Subject: [PATCH 23/93] Remove the codecov badge --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 98150ed27..f12a41945 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ Free and Open decentralized data store `Tahoe-LAFS `__ (Tahoe Least-Authority File Store) is the first free software / open-source storage technology that distributes your data across multiple servers. Even if some servers fail or are taken over by an attacker, the entire file store continues to function correctly, preserving your privacy and security. -|Contributor Covenant| |readthedocs| |travis| |circleci| |codecov| +|Contributor Covenant| |readthedocs| |travis| |circleci| Table of contents From e9a7838d882a96128ec64ea7b93eecce4aa93b54 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 17:46:50 -0500 Subject: [PATCH 24/93] Remove the codecov anchor --- README.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.rst b/README.rst index f12a41945..ef094456c 100644 --- a/README.rst +++ b/README.rst @@ -125,9 +125,6 @@ See `TGPPL.PDF `__ for why the TGPPL ex .. |circleci| image:: https://circleci.com/gh/tahoe-lafs/tahoe-lafs.svg?style=svg :target: https://circleci.com/gh/tahoe-lafs/tahoe-lafs -.. |codecov| image:: https://codecov.io/github/tahoe-lafs/tahoe-lafs/coverage.svg?branch=master - :alt: test coverage percentage - :target: https://codecov.io/github/tahoe-lafs/tahoe-lafs?branch=master .. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg :alt: code of conduct From b06730b9afb6a67465ace0515ac3c4d5bceb7864 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 17:46:58 -0500 Subject: [PATCH 25/93] Add the coveralls.io badge --- README.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index ef094456c..b1f6d2563 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ Free and Open decentralized data store `Tahoe-LAFS `__ (Tahoe Least-Authority File Store) is the first free software / open-source storage technology that distributes your data across multiple servers. Even if some servers fail or are taken over by an attacker, the entire file store continues to function correctly, preserving your privacy and security. -|Contributor Covenant| |readthedocs| |travis| |circleci| +|Contributor Covenant| |readthedocs| |travis| |circleci| |coveralls| Table of contents @@ -125,6 +125,9 @@ See `TGPPL.PDF `__ for why the TGPPL ex .. |circleci| image:: https://circleci.com/gh/tahoe-lafs/tahoe-lafs.svg?style=svg :target: https://circleci.com/gh/tahoe-lafs/tahoe-lafs +.. |coveralls| image:: https://coveralls.io/repos/github/tahoe-lafs/tahoe-lafs/badge.svg + :alt: code coverage + :target: https://coveralls.io/github/tahoe-lafs/tahoe-lafs .. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg :alt: code of conduct From f5ba293f79de8672ca1e625da1ee942b79a2ef0b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 20:27:17 -0500 Subject: [PATCH 26/93] Ideally this is no longer necessary --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0451c3bd1..6d21cd247 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" run: | pip install coveralls - python -m coveralls --verbose + python -m coveralls env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From e382ef8a893b1984d3f1fc1033f36b92fb55da4a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 20:27:41 -0500 Subject: [PATCH 27/93] Clean up the explanation, link to some more/better stuff --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d21cd247..2aaf2f6c0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -162,20 +162,36 @@ jobs: # However, we have the further constraint that our build_num must # agree with whatever coveralls-python has selected. An inspection # of the coveralls-python source suggests that GITHUB_RUN_ID is - # used. However, observation of the coveralls.io web interface - # suggests the value instead is something more like: + # used: # - # $(git rev-parse refs/pull//merge)-PR- + # * https://github.com/coveralls-clients/coveralls-python/blob/a9b36299ce9ba3bb6858700781881029d82e545d/coveralls/api.py#L105-L109 + # * https://github.com/coveralls-clients/coveralls-python/blob/a9b36299ce9ba3bb6858700781881029d82e545d/coveralls/api.py#L54-L59 # - # Thus, we select the same. + # However, observation of the coveralls.io web interface, logs from + # the coveralls command in action, and experimentation suggests the + # value instead is something more like: + # + # -PR- + # + # For PRs it is the merge commit (`refs/pull//merge`). For + # branches, the tip. - # refs/pull//merge was justed checked out by so we can just - # rev-parse HEAD to find the revision. + # For pull requests, refs/pull//merge was just checked out + # by so HEAD will refer to the right revision. For branches, HEAD + # is also the tip of the branch. REV=$(git rev-parse HEAD) # We can get the PR number from the "context". - # https://github.community/t/github-ref-is-inconsistent/17728/3 - # https://docs.github.com/en/free-pro-team@latest/articles/events-that-trigger-workflows + # + # https://docs.github.com/en/free-pro-team@latest/developers/webhooks-and-events/webhook-events-and-payloads#pull_request + # + # (via ). + # + # If this is a pull request, `github.event` is a `pull_request` + # structure which has `number` right in it. + # + # If this is a push, `github.event` is a `push` instead and ... XXX ??? + PR=${{ github.event.number }} BUILD_NUM=$REV-PR-$PR From 3b8df95e3e19d4b3aaa30799ff6e630fb10fae7f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 20:52:12 -0500 Subject: [PATCH 28/93] Try constructing build_num differently for push --- .github/workflows/ci.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2aaf2f6c0..701b6815c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -190,11 +190,16 @@ jobs: # If this is a pull request, `github.event` is a `pull_request` # structure which has `number` right in it. # - # If this is a push, `github.event` is a `push` instead and ... XXX ??? + # If this is a push, `github.event` is a `push` instead but we only + # need the revision to construct the build_num. PR=${{ github.event.number }} - BUILD_NUM=$REV-PR-$PR + if [ "${PR}" = "" ]; then + BUILD_NUM=$REV + else + BUILD_NUM=$REV-PR-$PR + fi REPO_NAME=$GITHUB_REPOSITORY curl \ From 709823e5629140299d2f1fb191dbdfdbd3583dd7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 7 Jan 2021 18:24:57 -0500 Subject: [PATCH 29/93] most of those words proved irrelevant --- .github/workflows/ci.yml | 66 ++++------------------------------------ 1 file changed, 6 insertions(+), 60 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 701b6815c..5c7b21151 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -104,7 +104,7 @@ jobs: # # But all it does is this simple POST so we can just send it # ourselves. The only hard part is guessing what the POST - # parameters mean. + # parameters mean. And I've done that for you already. # # Since the build is done I'm going to guess that "done" is a fine # value for status. @@ -114,67 +114,13 @@ jobs: # is on CircleCI. CircleCI documentation says this about # CIRCLE_WORKFLOW_ID: # - # A unique identifier for the workflow instance of the current - # job. This identifier is the same for every job in a given - # workflow instance. + # Observation of the coveralls.io web interface, logs from the + # coveralls command in action, and experimentation suggests the + # value for PRs is something more like: # - # (from https://circleci.com/docs/2.0/env-vars/) + # -PR- # - # A CircleCI workflow is roughly "the group of jobs run for a - # particular commit". There are exceptions to this but maybe we can - # ignore them. - # - # The only over hints we get from Coveralls about "build_num" are: - # - # * An example value of `1234` - # - # * Another example value of `$BUILD_NUMBER` where BUILD_NUMBER is - # not defined anywhere. - # - # Starting from the CircleCI workflow example, then, and looking at - # the environment variables GitHub Actions offers - # (https://docs.github.com/en/free-pro-team@latest/actions/reference/environment-variables#default-environment-variables) there are two of interest: - # - # * GITHUB_RUN_ID - A unique number for each run within a - # repository. This number does not change if you re-run the - # workflow run. - # - # * GITHUB_RUN_NUMBER - A unique number for each run of a particular - # workflow in a repository. This number begins at 1 for the - # workflow's first run, and increments with each new run. This - # number does not change if you re-run the workflow run. - # - # These seem to offer approximately the same value and only differ - # on whether they will be unique for the project as a whole or just - # for this particular workflow ("ci", as defined by this file). - # - # Unfortunately neither of them changes if a workflow is re-run. - # This differs from the behavior of CircleCI's CIRCLE_WORKFLOW_ID - # where a new value is assigned if a workflow is re-run. - # - # The consequence of this would seem to be that multiple runs of a - # GitHub Actions workflow will have their coverage reported to the - # same job. And since we eventually "finish" a job, later runs - # would be discarded (I suppose). - # - # There doesn't seem to be a way to do any better, though. - # - # However, we have the further constraint that our build_num must - # agree with whatever coveralls-python has selected. An inspection - # of the coveralls-python source suggests that GITHUB_RUN_ID is - # used: - # - # * https://github.com/coveralls-clients/coveralls-python/blob/a9b36299ce9ba3bb6858700781881029d82e545d/coveralls/api.py#L105-L109 - # * https://github.com/coveralls-clients/coveralls-python/blob/a9b36299ce9ba3bb6858700781881029d82e545d/coveralls/api.py#L54-L59 - # - # However, observation of the coveralls.io web interface, logs from - # the coveralls command in action, and experimentation suggests the - # value instead is something more like: - # - # -PR- - # - # For PRs it is the merge commit (`refs/pull//merge`). For - # branches, the tip. + # For branches, it's just the git branch tip hash. # For pull requests, refs/pull//merge was just checked out # by so HEAD will refer to the right revision. For branches, HEAD From 5614c4c3f4a80699dc3700f72068b5d3a6464ed3 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 8 Jan 2021 08:27:00 -0500 Subject: [PATCH 30/93] improve this comment marginally just looking for an excuse to trigger another build and see if inviting "coveralls" to be a collaborator on LeastAuthority/tahoe-lafs fixes the status reporting issue. --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5c7b21151..f690d11ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -76,7 +76,9 @@ jobs: # Mark the data as just one piece of many because we have more than # one instance of this job (Windows, macOS) which collects and # reports coverage. This is necessary to cause Coveralls to merge - # multiple coverage results into a single report. + # multiple coverage results into a single report. Note the merge + # only happens when we "finish" a particular build, as identified by + # its "build_num" (aka "service_number"). COVERALLS_PARALLEL: true # Tell Coveralls that we're done reporting coverage data. Since we're using From e72c93a9826f7950cedfcdffaf2c631cd7b0a7b6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 8 Jan 2021 08:52:38 -0500 Subject: [PATCH 31/93] explain why we're not using the github action here --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f690d11ea..4f63ed19a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,10 @@ jobs: name: eliot.log path: eliot.log - # Upload this job's coverage data to Coveralls. + # Upload this job's coverage data to Coveralls. While there is a GitHub + # Action for this, as of Jan 2021 it does not support Python coverage + # files - only lcov files. Therefore, we use coveralls-python, the + # coveralls.io-supplied Python reporter, for this. - name: "Report Coverage to Coveralls" run: | pip install coveralls From 73110f48da854b00ea2c3cb25c15152ed584ab04 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 14:56:46 -0500 Subject: [PATCH 32/93] Banish getProcessOutputAndValue from test_runner It cannot do the right thing on Windows for non-ASCII because Twisted uses pywin32 and on Python 2 pywin32 binds CreateProcessA. --- src/allmydata/test/test_runner.py | 147 +++++++++++++----------------- src/allmydata/test/test_system.py | 22 ++++- 2 files changed, 85 insertions(+), 84 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index ef2b99a19..64afca939 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -5,6 +5,14 @@ from __future__ import ( import os.path, re, sys from os import linesep +from subprocess import ( + PIPE, + Popen, +) + +from eliot import ( + log_call, +) from twisted.trial import unittest @@ -19,7 +27,6 @@ from twisted.python.runtime import ( platform, ) from allmydata.util import fileutil, pollmixin -from allmydata.util.encodingutil import unicode_to_argv, unicode_to_output from allmydata.test import common_util import allmydata from .common_util import parse_cli, run_cli @@ -29,12 +36,8 @@ from .cli_node_api import ( on_stdout, on_stdout_and_stderr, ) -from ._twisted_9607 import ( - getProcessOutputAndValue, -) from ..util.eliotutil import ( inline_callbacks, - log_call_deferred, ) def get_root_from_file(src): @@ -54,93 +57,72 @@ srcfile = allmydata.__file__ rootdir = get_root_from_file(srcfile) -class RunBinTahoeMixin(object): - @log_call_deferred(action_type="run-bin-tahoe") - def run_bintahoe(self, args, stdin=None, python_options=[], env=None): - command = sys.executable - argv = python_options + ["-m", "allmydata.scripts.runner"] + args +@log_call(action_type="run-bin-tahoe") +def run_bintahoe(extra_argv): + """ + Run the main Tahoe entrypoint in a child process with the given additional + arguments. - if env is None: - env = os.environ + :param [unicode] extra_argv: More arguments for the child process argv. - d = getProcessOutputAndValue(command, argv, env, stdinBytes=stdin) - def fix_signal(result): - # Mirror subprocess.Popen.returncode structure - (out, err, signal) = result - return (out, err, -signal) - d.addErrback(fix_signal) - return d + :return: A three-tuple of stdout (unicode), stderr (unicode), and the + child process "returncode" (int). + """ + argv = [sys.executable, u"-m", u"allmydata.scripts.runner"] + extra_argv + p = Popen(argv, stdout=PIPE, stderr=PIPE) + out = p.stdout.read().decode("utf-8") + err = p.stderr.read().decode("utf-8") + returncode = p.wait() + return (out, err, returncode) -class BinTahoe(common_util.SignalMixin, unittest.TestCase, RunBinTahoeMixin): +class BinTahoe(common_util.SignalMixin, unittest.TestCase): def test_unicode_arguments_and_output(self): + """ + The runner script receives unmangled non-ASCII values in argv. + """ tricky = u"\u2621" - try: - tricky_arg = unicode_to_argv(tricky, mangle=True) - tricky_out = unicode_to_output(tricky) - except UnicodeEncodeError: - raise unittest.SkipTest("A non-ASCII argument/output could not be encoded on this platform.") + out, err, returncode = run_bintahoe([tricky]) + self.assertEqual(returncode, 1) + self.assertIn(u"Unknown command: " + tricky, out) - d = self.run_bintahoe([tricky_arg]) - def _cb(res): - out, err, rc_or_sig = res - self.failUnlessEqual(rc_or_sig, 1, str(res)) - self.failUnlessIn("Unknown command: "+tricky_out, out) - d.addCallback(_cb) - return d - - def test_run_with_python_options(self): - # -t is a harmless option that warns about tabs. - d = self.run_bintahoe(["--version"], python_options=["-t"]) - def _cb(res): - out, err, rc_or_sig = res - self.assertEqual(rc_or_sig, 0, str(res)) - self.assertTrue(out.startswith(allmydata.__appname__ + '/'), str(res)) - d.addCallback(_cb) - return d - - @inlineCallbacks def test_help_eliot_destinations(self): - out, err, rc_or_sig = yield self.run_bintahoe(["--help-eliot-destinations"]) - self.assertIn("\tfile:", out) - self.assertEqual(rc_or_sig, 0) + out, err, returncode = run_bintahoe([u"--help-eliot-destinations"]) + self.assertIn(u"\tfile:", out) + self.assertEqual(returncode, 0) - @inlineCallbacks def test_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ + out, err, returncode = run_bintahoe([ # Proves little but maybe more than nothing. - "--eliot-destination=file:-", + u"--eliot-destination=file:-", # Throw in *some* command or the process exits with error, making # it difficult for us to see if the previous arg was accepted or # not. - "--help", + u"--help", ]) - self.assertEqual(rc_or_sig, 0) + self.assertEqual(returncode, 0) - @inlineCallbacks def test_unknown_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=invalid:more", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=invalid:more", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("Unknown destination description", out) - self.assertIn("invalid:more", out) + self.assertEqual(1, returncode) + self.assertIn(u"Unknown destination description", out) + self.assertIn(u"invalid:more", out) - @inlineCallbacks def test_malformed_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=invalid", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=invalid", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("must be formatted like", out) + self.assertEqual(1, returncode) + self.assertIn(u"must be formatted like", out) - @inlineCallbacks def test_escape_in_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=file:@foo", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=file:@foo", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("Unsupported escape character", out) + self.assertEqual(1, returncode) + self.assertIn(u"Unsupported escape character", out) class CreateNode(unittest.TestCase): @@ -250,8 +232,7 @@ class CreateNode(unittest.TestCase): ) -class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, - RunBinTahoeMixin): +class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): """ exercise "tahoe run" for both introducer and client node, by spawning "tahoe run" as a subprocess. This doesn't get us line-level coverage, but @@ -271,18 +252,18 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, The introducer furl is stable across restarts. """ basedir = self.workdir("test_introducer") - c1 = os.path.join(basedir, "c1") + c1 = os.path.join(basedir, u"c1") tahoe = CLINodeAPI(reactor, FilePath(c1)) self.addCleanup(tahoe.stop_and_wait) - out, err, rc_or_sig = yield self.run_bintahoe([ - "--quiet", - "create-introducer", - "--basedir", c1, - "--hostname", "127.0.0.1", + out, err, returncode = run_bintahoe([ + u"--quiet", + u"create-introducer", + u"--basedir", c1, + u"--hostname", u"127.0.0.1", ]) - self.assertEqual(rc_or_sig, 0) + self.assertEqual(returncode, 0) # This makes sure that node.url is written, which allows us to # detect when the introducer restarts in _node_has_restarted below. @@ -350,18 +331,18 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, 3) Verify that the pid file is removed after SIGTERM (on POSIX). """ basedir = self.workdir("test_client") - c1 = os.path.join(basedir, "c1") + c1 = os.path.join(basedir, u"c1") tahoe = CLINodeAPI(reactor, FilePath(c1)) # Set this up right now so we don't forget later. self.addCleanup(tahoe.cleanup) - out, err, rc_or_sig = yield self.run_bintahoe([ - "--quiet", "create-node", "--basedir", c1, - "--webport", "0", - "--hostname", "localhost", + out, err, returncode = run_bintahoe([ + u"--quiet", u"create-node", u"--basedir", c1, + u"--webport", u"0", + u"--hostname", u"localhost", ]) - self.failUnlessEqual(rc_or_sig, 0) + self.failUnlessEqual(returncode, 0) # Check that the --webport option worked. config = fileutil.read(tahoe.config_file.path) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 235361cf8..75219004b 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -51,6 +51,10 @@ from twisted.python.filepath import ( FilePath, ) +from ._twisted_9607 import ( + getProcessOutputAndValue, +) + from .common import ( TEST_RSA_KEY_SIZE, SameProcessStreamEndpointAssigner, @@ -61,13 +65,29 @@ from .web.common import ( ) # TODO: move this to common or common_util -from allmydata.test.test_runner import RunBinTahoeMixin from . import common_util as testutil from .common_util import run_cli_unicode from ..scripts.common import ( write_introducer, ) +class RunBinTahoeMixin(object): + def run_bintahoe(self, args, stdin=None, python_options=[], env=None): + command = sys.executable + argv = python_options + ["-m", "allmydata.scripts.runner"] + args + + if env is None: + env = os.environ + + d = getProcessOutputAndValue(command, argv, env, stdinBytes=stdin) + def fix_signal(result): + # Mirror subprocess.Popen.returncode structure + (out, err, signal) = result + return (out, err, -signal) + d.addErrback(fix_signal) + return d + + def run_cli(*args, **kwargs): """ Run a Tahoe-LAFS CLI utility, but inline. From c6d108ddb25b367b041d97a6110a0666d9307062 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:07:37 -0500 Subject: [PATCH 33/93] Make test_runner and test_windows both use the good Popen --- src/allmydata/test/common.py | 22 ++++++++++++++++++++++ src/allmydata/test/test_runner.py | 13 ++++++++----- src/allmydata/test/test_windows.py | 14 ++------------ 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index f1dbf651d..a49a79ec0 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -9,6 +9,10 @@ __all__ = [ "flush_logged_errors", "skip", "skipIf", + + # Selected based on platform and re-exported for convenience. + "Popen", + "PIPE", ] from past.builtins import chr as byteschr, unicode @@ -48,6 +52,9 @@ from testtools.twistedsupport import ( flush_logged_errors, ) +from twisted.python.runtime import ( + platform, +) from twisted.application import service from twisted.plugin import IPlugin from twisted.internet import defer @@ -101,6 +108,21 @@ from .eliotutil import ( ) from .common_util import ShouldFailMixin # noqa: F401 +if platform.isWindows(): + # Python 2.7 doesn't have good options for launching a process with + # non-ASCII in its command line. So use this alternative that does a + # better job. However, only use it on Windows because it doesn't work + # anywhere else. + from ._win_subprocess import ( + PIPE, + Popen, + ) +else: + from subprocess import ( + PIPE, + Popen, + ) + TEST_RSA_KEY_SIZE = 522 diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 64afca939..c98d4e376 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -5,10 +5,6 @@ from __future__ import ( import os.path, re, sys from os import linesep -from subprocess import ( - PIPE, - Popen, -) from eliot import ( log_call, @@ -29,7 +25,14 @@ from twisted.python.runtime import ( from allmydata.util import fileutil, pollmixin from allmydata.test import common_util import allmydata -from .common_util import parse_cli, run_cli +from .common import ( + PIPE, + Popen, +) +from .common_util import ( + parse_cli, + run_cli, +) from .cli_node_api import ( CLINodeAPI, Expect, diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index f2c1318c5..01e4a57c1 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -29,11 +29,6 @@ from json import ( from textwrap import ( dedent, ) -from subprocess import ( - PIPE, - Popen, -) - from twisted.python.filepath import ( FilePath, ) @@ -66,6 +61,8 @@ from hypothesis.strategies import ( ) from .common import ( + PIPE, + Popen, SyncTestCase, ) @@ -132,13 +129,6 @@ class GetArgvTests(SyncTestCase): ``get_argv`` returns a list representing the result of tokenizing the "command line" argument string provided to Windows processes. """ - # Python 2.7 doesn't have good options for launching a process with - # non-ASCII in its command line. So use this alternative that does a - # better job. Bury the import here because it only works on Windows. - from ._win_subprocess import ( - Popen - ) - working_path = FilePath(self.mktemp()) working_path.makedirs() save_argv_path = working_path.child("script.py") From 834abfe6bf8736d5c96477aca7028a297e689717 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:09:25 -0500 Subject: [PATCH 34/93] _win_subprocess didn't actually export this --- src/allmydata/test/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index a49a79ec0..2b1adebd9 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -114,15 +114,15 @@ if platform.isWindows(): # better job. However, only use it on Windows because it doesn't work # anywhere else. from ._win_subprocess import ( - PIPE, Popen, ) else: from subprocess import ( - PIPE, Popen, ) - +from subprocess import ( + PIPE, +) TEST_RSA_KEY_SIZE = 522 From 5df86b46084355cdc92d875e527f9ca08281d0a1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:26:12 -0500 Subject: [PATCH 35/93] restore test_with_python_options now that I see what it's testing --- src/allmydata/test/test_runner.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index c98d4e376..ad03bd391 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -61,7 +61,7 @@ rootdir = get_root_from_file(srcfile) @log_call(action_type="run-bin-tahoe") -def run_bintahoe(extra_argv): +def run_bintahoe(extra_argv, python_options=None): """ Run the main Tahoe entrypoint in a child process with the given additional arguments. @@ -71,7 +71,11 @@ def run_bintahoe(extra_argv): :return: A three-tuple of stdout (unicode), stderr (unicode), and the child process "returncode" (int). """ - argv = [sys.executable, u"-m", u"allmydata.scripts.runner"] + extra_argv + argv = [sys.executable] + if python_options is not None: + argv.extend(python_options) + argv.extend([u"-m", u"allmydata.scripts.runner"]) + argv.extend(extra_argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") @@ -89,6 +93,21 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): self.assertEqual(returncode, 1) self.assertIn(u"Unknown command: " + tricky, out) + def test_with_python_options(self): + """ + Additional options for the Python interpreter don't prevent the runner + script from receiving the arguments meant for it. + """ + # This seems like a redundant test for someone else's functionality + # but on Windows we parse the whole command line string ourselves so + # we have to have our own implementation of skipping these options. + + # -t is a harmless option that warns about tabs so we can add it + # -without impacting other behavior noticably. + out, err, returncode = run_bintahoe(["--version"], python_options=["-t"]) + self.assertEqual(returncode, 0) + self.assertTrue(out.startswith(allmydata.__appname__ + '/')) + def test_help_eliot_destinations(self): out, err, returncode = run_bintahoe([u"--help-eliot-destinations"]) self.assertIn(u"\tfile:", out) From b81d57779a1b9a3a09c6de44e9d3e68d56eaed63 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:29:12 -0500 Subject: [PATCH 36/93] Tahoe's .pyscript is ancient history --- src/allmydata/windows/fixups.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index c71b85681..237f96ca5 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -185,8 +185,6 @@ def initialize(): # as in the case of a frozen executable created by bb-freeze or similar. sys.argv = argv[-len(sys.argv):] - if sys.argv[0].endswith('.pyscript'): - sys.argv[0] = sys.argv[0][:-9] def a_console(handle): From 1639aef197db54a3522293f16ed0acc34e0e955f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:29:32 -0500 Subject: [PATCH 37/93] Get rid of the argv unmangling that we no longer do --- src/allmydata/windows/fixups.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 237f96ca5..d34404aed 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -161,29 +161,13 @@ def initialize(): except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) - # This works around . - - # Because of (and similar limitations in - # twisted), the 'bin/tahoe' script cannot invoke us with the actual Unicode arguments. - # Instead it "mangles" or escapes them using \x7F as an escape character, which we - # unescape here. - def unmangle(s): - return re.sub(u'\\x7F[0-9a-fA-F]*\\;', lambda m: unichr(int(m.group(0)[1:-1], 16)), s) - - argv_unicode = get_argv() - try: - argv = [unmangle(argv_u).encode('utf-8') for argv_u in argv_unicode] - except Exception as e: - _complain("%s: could not unmangle Unicode arguments.\n%r" - % (sys.argv[0], argv_unicode)) - raise + argv = list(arg.encode("utf-8") for arg in get_argv()) # Take only the suffix with the same number of arguments as sys.argv. # This accounts for anything that can cause initial arguments to be stripped, # for example, the Python interpreter or any options passed to it, or runner # scripts such as 'coverage run'. It works even if there are no such arguments, # as in the case of a frozen executable created by bb-freeze or similar. - sys.argv = argv[-len(sys.argv):] From 2306819db1829da1eb839fa38da8b2dd4cf4970a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:45:39 -0500 Subject: [PATCH 38/93] Get rid of unicode_to_argv and argv_to_unicode --- src/allmydata/scripts/cli.py | 42 ++++++++++++------------- src/allmydata/scripts/create_node.py | 6 ++-- src/allmydata/test/cli/common.py | 5 +-- src/allmydata/test/cli/test_backup.py | 5 +-- src/allmydata/test/cli/test_put.py | 7 +++-- src/allmydata/test/common_util.py | 2 +- src/allmydata/test/test_encodingutil.py | 35 ++------------------- src/allmydata/test/test_system.py | 4 +-- src/allmydata/util/encodingutil.py | 38 +--------------------- 9 files changed, 42 insertions(+), 102 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 379e1d212..bad96a252 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -4,7 +4,7 @@ import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions -from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path +from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path from .tahoe_status import TahoeStatusCommand NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions): ] def parseArgs(self, where=""): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): @@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions): class AddAliasOptions(FileStoreOptions): def parseArgs(self, alias, cap): - self.alias = argv_to_unicode(alias) + self.alias = unicode(alias, "utf-8") if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap @@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions): class CreateAliasOptions(FileStoreOptions): def parseArgs(self, alias): - self.alias = argv_to_unicode(alias) + self.alias = unicode(alias, "utf-8") if self.alias.endswith(u':'): self.alias = self.alias[:-1] @@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions): ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [PATH]" @@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions): if arg2 == "-": arg2 = None - self.from_file = argv_to_unicode(arg1) + self.from_file = unicode(arg1, "utf-8") self.to_file = None if arg2 is None else argv_to_abspath(arg2) synopsis = "[options] REMOTE_FILE LOCAL_FILE" @@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions): arg1 = None self.from_file = None if arg1 is None else argv_to_abspath(arg1) - self.to_file = None if arg2 is None else argv_to_unicode(arg2) + self.to_file = None if arg2 is None else unicode(arg2, "utf-8") if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): @@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = map(argv_to_unicode, args[:-1]) - self.destination = argv_to_unicode(args[-1]) + self.sources = list(unicode(a, "utf-8") for a in args[:-1]) + self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions): class UnlinkOptions(FileStoreOptions): def parseArgs(self, where): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] REMOTE_FILE" description = "Remove a named file from its parent directory." class MvOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = argv_to_unicode(frompath) - self.to_file = argv_to_unicode(topath) + self.from_file = unicode(frompath, "utf-8") + self.to_file = unicode(topath, "utf-8") synopsis = "[options] FROM TO" @@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions): class LnOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = argv_to_unicode(frompath) - self.to_file = argv_to_unicode(topath) + self.from_file = unicode(frompath, "utf-8") + self.to_file = unicode(topath, "utf-8") synopsis = "[options] FROM_LINK TO_LINK" @@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions): def parseArgs(self, localdir, topath): self.from_dir = argv_to_abspath(localdir) - self.to_dir = argv_to_unicode(topath) + self.to_dir = unicode(topath, "utf-8") synopsis = "[options] FROM ALIAS:TO" def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" - g = argv_to_unicode(pattern).strip() + g = unicode(pattern, "utf-8").strip() if g: exclude = self['exclude'] exclude.add(g) @@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" @@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" description = """ @@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" description = """ @@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = map(argv_to_unicode, locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = map(argv_to_unicode, locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index ac17cf445..ed4f0c71d 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -16,7 +16,7 @@ from allmydata.scripts.common import ( ) from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.assertutil import precondition -from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding +from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding from allmydata.util import fileutil, i2p_provider, iputil, tor_provider from wormhole import wormhole @@ -238,7 +238,7 @@ def write_node_config(c, config): c.write("\n") c.write("[node]\n") - nickname = argv_to_unicode(config.get("nickname") or "") + nickname = unicode(config.get("nickname") or "", "utf-8") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) if config["hide-ip"]: c.write("reveal-IP-address = false\n") @@ -246,7 +246,7 @@ def write_node_config(c, config): c.write("reveal-IP-address = true\n") # TODO: validate webport - webport = argv_to_unicode(config.get("webport") or "none") + webport = unicode(config.get("webport") or "none", "utf-8") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index bf175de44..13445ef0a 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,4 +1,5 @@ -from ...util.encodingutil import unicode_to_argv +from six import ensure_str + from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -45,6 +46,6 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) + client_dir = ensure_str(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index ceecbd662..6aecd0af6 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -1,4 +1,5 @@ import os.path +from six import ensure_str from six.moves import cStringIO as StringIO from datetime import timedelta import re @@ -9,7 +10,7 @@ from twisted.python.monkey import MonkeyPatcher import __builtin__ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.namespace import Namespace from allmydata.scripts import cli, backupdb from ..common_util import StallMixin @@ -413,7 +414,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): return StringIO() patcher = MonkeyPatcher((__builtin__, 'file', call_file)) - patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) + patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to']) self.failUnless(ns.called) def test_ignore_symlinks(self): diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 08a66f98d..2deafb784 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,4 +1,7 @@ import os.path + +from six import ensure_str + from twisted.trial import unittest from twisted.python import usage @@ -7,7 +10,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin @@ -47,7 +50,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid(oneshare=True) rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) + abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 2a70cff3a..7b3194d3f 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs): encoding = kwargs.pop("encoding", None) precondition( all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)), - "arguments to run_cli must be a native string -- convert using unicode_to_argv", + "arguments to run_cli must be a native string -- convert using UTF-8", verb=verb, args=args, nodeargs=nodeargs, diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index cbc9143b7..5f6700cd6 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -81,12 +81,12 @@ from allmydata.test.common_util import ( ReallyEqualMixin, skip_if_cannot_represent_filename, ) from allmydata.util import encodingutil, fileutil -from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ +from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ - to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ - unicode_to_argv + to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from + from twisted.python import usage @@ -138,12 +138,6 @@ class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): _reload() self.assertEqual(get_io_encoding(), 'utf-8') - def test_argv_to_unicode(self): - encodingutil.io_encoding = 'utf-8' - self.failUnlessRaises(usage.UsageError, - argv_to_unicode, - lumiere_nfc.encode('latin1')) - @skipIf(PY3, "Python 2 only.") def test_unicode_to_output(self): encodingutil.io_encoding = 'koi8-r' @@ -213,19 +207,6 @@ class EncodingUtil(ReallyEqualMixin): sys.platform = self.original_platform _reload() - def test_argv_to_unicode(self): - if 'argv' not in dir(self): - return - - mock_stdout = MockStdout() - mock_stdout.encoding = self.io_encoding - self.patch(sys, 'stdout', mock_stdout) - - argu = lumiere_nfc - argv = self.argv - _reload() - self.failUnlessReallyEqual(argv_to_unicode(argv), argu) - def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") @@ -245,16 +226,6 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_output_py3(self): self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) - @skipIf(PY3, "Python 2 only.") - def test_unicode_to_argv_py2(self): - """unicode_to_argv() converts to bytes on Python 2.""" - self.assertEqual(unicode_to_argv("abc"), u"abc".encode(self.io_encoding)) - - @skipIf(PY2, "Python 3 only.") - def test_unicode_to_argv_py3(self): - """unicode_to_argv() is noop on Python 3.""" - self.assertEqual(unicode_to_argv("abc"), "abc") - @skipIf(PY3, "Python 3 only.") def test_unicode_platform_py2(self): matrix = { diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 75219004b..03b9ba2de 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil, pollmixin, fileutil from allmydata.util import log, base32 -from allmydata.util.encodingutil import quote_output, unicode_to_argv +from allmydata.util.encodingutil import quote_output from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ @@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): log.msg("test_system.SystemTest._test_runner using %r" % filename) rc,output,err = yield run_cli("debug", "dump-share", "--offsets", - unicode_to_argv(filename)) + ensure_str(filename)) self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index f13dc5b8e..5cc3b8d19 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -107,53 +107,17 @@ def get_io_encoding(): """ return io_encoding -def argv_to_unicode(s): - """ - Decode given argv element to unicode. If this fails, raise a UsageError. - """ - if isinstance(s, unicode): - return s - - precondition(isinstance(s, bytes), s) - - try: - return unicode(s, io_encoding) - except UnicodeDecodeError: - raise usage.UsageError("Argument %s cannot be decoded as %s." % - (quote_output(s), io_encoding)) - def argv_to_abspath(s, **kwargs): """ Convenience function to decode an argv element to an absolute path, with ~ expanded. If this fails, raise a UsageError. """ - decoded = argv_to_unicode(s) + decoded = unicode(s, "utf-8") if decoded.startswith(u'-'): raise usage.UsageError("Path argument %s cannot start with '-'.\nUse %s if you intended to refer to a file." % (quote_output(s), quote_output(os.path.join('.', s)))) return abspath_expanduser_unicode(decoded, **kwargs) -def unicode_to_argv(s, mangle=False): - """ - Encode the given Unicode argument as a bytestring. - If the argument is to be passed to a different process, then the 'mangle' argument - should be true; on Windows, this uses a mangled encoding that will be reversed by - code in runner.py. - - On Python 3, just return the string unchanged, since argv is unicode. - """ - precondition(isinstance(s, unicode), s) - if PY3: - warnings.warn("This will be unnecessary once Python 2 is dropped.", - DeprecationWarning) - return s - - if mangle and sys.platform == "win32": - # This must be the same as 'mangle' in bin/tahoe-script.template. - return bytes(re.sub(u'[^\\x20-\\x7F]', lambda m: u'\x7F%x;' % (ord(m.group(0)),), s), io_encoding) - else: - return s.encode(io_encoding) - def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 260706d33015a3df608db94d9fbd0de4cea481cc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:00:42 -0500 Subject: [PATCH 39/93] Fix the collision with the builtin list --- src/allmydata/scripts/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index bad96a252..310cb20fc 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,4 +1,5 @@ from __future__ import print_function +import __builtin__ import os.path, re, fnmatch from twisted.python import usage @@ -218,7 +219,7 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = list(unicode(a, "utf-8") for a in args[:-1]) + self.sources = __builtin__.list(unicode(a, "utf-8") for a in args[:-1]) self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -429,7 +430,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +447,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ From b8abec607335f3b4242f44ea7bf832cc7df8bef6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:00:48 -0500 Subject: [PATCH 40/93] Get rid of the Latin-1 case Here's a supposition: UTF-8 or bust --- src/allmydata/test/cli/test_alias.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 72b634608..07f42b29d 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -99,22 +99,6 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): ) - def test_list_latin_1(self): - """ - An alias composed of all Latin-1-encodeable code points can be created - when the active encoding is Latin-1. - - This is very similar to ``test_list_utf_8`` but the assumption of - UTF-8 is nearly ubiquitous and explicitly exercising the codepaths - with a UTF-8-incompatible encoding helps flush out unintentional UTF-8 - assumptions. - """ - return self._check_create_alias( - u"taho\N{LATIN SMALL LETTER E WITH ACUTE}", - encoding="latin-1", - ) - - def test_list_utf_8(self): """ An alias composed of all UTF-8-encodeable code points can be created when From ec6c036f87fb74937cc4a246fcb4916575809ba6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:14:34 -0500 Subject: [PATCH 41/93] less cheesy list collision fix --- src/allmydata/scripts/cli.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 310cb20fc..c00917022 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,5 +1,4 @@ from __future__ import print_function -import __builtin__ import os.path, re, fnmatch from twisted.python import usage @@ -219,7 +218,7 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = __builtin__.list(unicode(a, "utf-8") for a in args[:-1]) + self.sources = list(unicode(a, "utf-8") for a in args[:-1]) self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -430,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -447,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -496,7 +495,7 @@ def list_aliases(options): rc = tahoe_add_alias.list_aliases(options) return rc -def list(options): +def list_(options): from allmydata.scripts import tahoe_ls rc = tahoe_ls.list(options) return rc @@ -582,7 +581,7 @@ dispatch = { "add-alias": add_alias, "create-alias": create_alias, "list-aliases": list_aliases, - "ls": list, + "ls": list_, "get": get, "put": put, "cp": cp, From de9bcc7ea85e0e0ab7da11aabd4b2d6a4ecdf07e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:21:20 -0500 Subject: [PATCH 42/93] encode Popen argv as UTF-8 on POSIX so we ignore locale --- src/allmydata/test/test_runner.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index ad03bd391..4054dc289 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -76,6 +76,11 @@ def run_bintahoe(extra_argv, python_options=None): argv.extend(python_options) argv.extend([u"-m", u"allmydata.scripts.runner"]) argv.extend(extra_argv) + if not platform.isWindows(): + # On POSIX Popen (via execvp) will encode argv using the "filesystem" + # encoding. Depending on LANG this may make our unicode arguments + # unencodable. Do our own UTF-8 encoding here instead. + argv = list(arg.encode("utf-8") for arg in argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") From 3d02545006ed08187be9cb9ad6902d1b6b543aa9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:29:15 -0500 Subject: [PATCH 43/93] Remove tests based on locale behavior We don't like locale behavior --- src/allmydata/test/test_encodingutil.py | 65 +------------------------ 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 5f6700cd6..d49abafb3 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -70,7 +70,7 @@ if __name__ == "__main__": sys.exit(0) -import os, sys, locale +import os, sys from unittest import skipIf from twisted.trial import unittest @@ -93,69 +93,6 @@ from twisted.python import usage class MockStdout(object): pass -class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): - def test_get_io_encoding(self): - mock_stdout = MockStdout() - self.patch(sys, 'stdout', mock_stdout) - - mock_stdout.encoding = 'UTF-8' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') - - mock_stdout.encoding = 'cp65001' - _reload() - self.assertEqual(get_io_encoding(), 'utf-8') - - mock_stdout.encoding = 'koi8-r' - expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - mock_stdout.encoding = 'nonexistent_encoding' - if sys.platform == "win32": - _reload() - self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') - else: - self.failUnlessRaises(AssertionError, _reload) - - def test_get_io_encoding_not_from_stdout(self): - preferredencoding = 'koi8-r' - def call_locale_getpreferredencoding(): - return preferredencoding - self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding) - mock_stdout = MockStdout() - self.patch(sys, 'stdout', mock_stdout) - - expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - mock_stdout.encoding = None - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - preferredencoding = None - _reload() - self.assertEqual(get_io_encoding(), 'utf-8') - - @skipIf(PY3, "Python 2 only.") - def test_unicode_to_output(self): - encodingutil.io_encoding = 'koi8-r' - self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) - - def test_no_unicode_normalization(self): - # Pretend to run on a Unicode platform. - # listdir_unicode normalized to NFC in 1.7beta, but now doesn't. - - def call_os_listdir(path): - return [Artonwall_nfd] - self.patch(os, 'listdir', call_os_listdir) - self.patch(sys, 'platform', 'darwin') - - _reload() - self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd]) - - # The following tests apply only to platforms that don't store filenames as # Unicode entities on the filesystem. class EncodingUtilNonUnicodePlatform(unittest.TestCase): From 23c34004a74ed9e95e5d25f04d3410286e5a1cac Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:29:49 -0500 Subject: [PATCH 44/93] Get rid of tests for bad io_encoding values We don't like bad io_encoding values --- src/allmydata/test/test_encodingutil.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index d49abafb3..a3c92d41c 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -84,11 +84,9 @@ from allmydata.util import encodingutil, fileutil from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ - get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ + get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from -from twisted.python import usage - class MockStdout(object): pass @@ -371,13 +369,6 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): check(u"\n", u"\"\\x0a\"", quote_newlines=True) def test_quote_output_default(self): - self.patch(encodingutil, 'io_encoding', 'ascii') - self.test_quote_output_ascii(None) - - self.patch(encodingutil, 'io_encoding', 'latin1') - self.test_quote_output_latin1(None) - - self.patch(encodingutil, 'io_encoding', 'utf-8') self.test_quote_output_utf8(None) From 60a44b99e69e33395daf48c46dfb0d4dc1ea3981 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:30:15 -0500 Subject: [PATCH 45/93] improve fixtures --- src/allmydata/test/test_encodingutil.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index a3c92d41c..d9d6cfeed 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -97,12 +97,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") def setUp(self): # Mock sys.platform because unicode_platform() uses it - self.original_platform = sys.platform - sys.platform = 'linux' - - def tearDown(self): - sys.platform = self.original_platform - _reload() + self.patch(sys, "platform", "linux") + self.addCleanup(_reload) def test_listdir_unicode(self): # What happens if latin1-encoded filenames are encountered on an UTF-8 @@ -135,12 +131,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): class EncodingUtil(ReallyEqualMixin): def setUp(self): - self.original_platform = sys.platform - sys.platform = self.platform - - def tearDown(self): - sys.platform = self.original_platform - _reload() + self.patch(sys, "platform", self.platform) + self.addCleanup(_reload) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") From 70d2fd66729789e548903618c566aebca82f6105 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:31:22 -0500 Subject: [PATCH 46/93] Don't have a Latin-1 io_encoding It's bad --- src/allmydata/test/test_encodingutil.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index d9d6cfeed..992ebd690 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -472,14 +472,6 @@ class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase): io_encoding = 'UTF-8' dirlist = [b'test_file', b'\xc3\x84rtonwall.mp3', b'Blah blah.txt'] -class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase): - uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' - argv = b'lumi\xe8re' - platform = 'linux2' - filesystem_encoding = 'ISO-8859-1' - io_encoding = 'ISO-8859-1' - dirlist = [b'test_file', b'Blah blah.txt', b'\xc4rtonwall.mp3'] - class Windows(EncodingUtil, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' argv = b'lumi\xc3\xa8re' From 1810f4e99b5d06869bef8d050d4614f93ed4a2f4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:31:41 -0500 Subject: [PATCH 47/93] Force the encoding to utf-8 more often --- src/allmydata/util/encodingutil.py | 38 +++++++----------------------- 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 5cc3b8d19..289874213 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -44,41 +44,19 @@ def canonical_encoding(encoding): return encoding -def check_encoding(encoding): - # sometimes Python returns an encoding name that it doesn't support for conversion - # fail early if this happens - try: - u"test".encode(encoding) - except (LookupError, AttributeError): - raise AssertionError("The character encoding '%s' is not supported for conversion." % (encoding,)) -filesystem_encoding = None -io_encoding = None +# On Windows we install UTF-8 stream wrappers for sys.stdout and +# sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). +# +# On POSIX, we are moving towards a UTF-8-everything and ignore the locale. +io_encoding = "utf-8" + is_unicode_platform = False use_unicode_filepath = False +filesystem_encoding = "mbcs" if sys.platform == "win32" else "utf-8" def _reload(): - global filesystem_encoding, io_encoding, is_unicode_platform, use_unicode_filepath - - filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) - check_encoding(filesystem_encoding) - - if sys.platform == 'win32': - # On Windows we install UTF-8 stream wrappers for sys.stdout and - # sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). - io_encoding = 'utf-8' - else: - ioenc = None - if hasattr(sys.stdout, 'encoding'): - ioenc = sys.stdout.encoding - if ioenc is None: - try: - ioenc = locale.getpreferredencoding() - except Exception: - pass # work around - io_encoding = canonical_encoding(ioenc) - - check_encoding(io_encoding) + global is_unicode_platform, use_unicode_filepath, filesystem_encoding is_unicode_platform = PY3 or sys.platform in ["win32", "darwin"] From 15c46924ce8200ed88f45cb20116417f794c60d5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:27:20 -0500 Subject: [PATCH 48/93] unused import --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 289874213..35bf26e0c 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -19,7 +19,7 @@ if PY2: from past.builtins import unicode -import sys, os, re, locale +import sys, os, re import unicodedata import warnings From 2889922a080771e0a1bb2dd28959929773df5eab Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:27:23 -0500 Subject: [PATCH 49/93] reign in scope - don't mess with filesystem encoding here It is a separate can of works from argv --- src/allmydata/util/encodingutil.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 35bf26e0c..32049b57f 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -32,6 +32,16 @@ from allmydata.util.fileutil import abspath_expanduser_unicode NoneType = type(None) +def check_encoding(encoding): + # sometimes Python returns an encoding name that it doesn't support for conversion + # fail early if this happens + try: + u"test".encode(encoding) + except (LookupError, AttributeError): + raise AssertionError( + "The character encoding '%s' is not supported for conversion." % (encoding,), + ) + def canonical_encoding(encoding): if encoding is None: log.msg("Warning: falling back to UTF-8 encoding.", level=log.WEIRD) @@ -53,11 +63,12 @@ io_encoding = "utf-8" is_unicode_platform = False use_unicode_filepath = False -filesystem_encoding = "mbcs" if sys.platform == "win32" else "utf-8" +filesystem_encoding = None def _reload(): - global is_unicode_platform, use_unicode_filepath, filesystem_encoding + global filesystem_encoding, is_unicode_platform, use_unicode_filepath + filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) is_unicode_platform = PY3 or sys.platform in ["win32", "darwin"] # Despite the Unicode-mode FilePath support added to Twisted in From a9a60857b2c5ee2a811eda6562e6e3c31c0b727c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:28:50 -0500 Subject: [PATCH 50/93] attempt to reduce diff noise --- src/allmydata/util/encodingutil.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 32049b57f..168f40a58 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -32,16 +32,6 @@ from allmydata.util.fileutil import abspath_expanduser_unicode NoneType = type(None) -def check_encoding(encoding): - # sometimes Python returns an encoding name that it doesn't support for conversion - # fail early if this happens - try: - u"test".encode(encoding) - except (LookupError, AttributeError): - raise AssertionError( - "The character encoding '%s' is not supported for conversion." % (encoding,), - ) - def canonical_encoding(encoding): if encoding is None: log.msg("Warning: falling back to UTF-8 encoding.", level=log.WEIRD) @@ -54,6 +44,15 @@ def canonical_encoding(encoding): return encoding +def check_encoding(encoding): + # sometimes Python returns an encoding name that it doesn't support for conversion + # fail early if this happens + try: + u"test".encode(encoding) + except (LookupError, AttributeError): + raise AssertionError( + "The character encoding '%s' is not supported for conversion." % (encoding,), + ) # On Windows we install UTF-8 stream wrappers for sys.stdout and # sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). From 7c0d2e3cd5cd3cc780882b6c583050f2fbe49e4e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:29:24 -0500 Subject: [PATCH 51/93] another un-re-shuffling --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 168f40a58..1c884a88d 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -60,9 +60,9 @@ def check_encoding(encoding): # On POSIX, we are moving towards a UTF-8-everything and ignore the locale. io_encoding = "utf-8" +filesystem_encoding = None is_unicode_platform = False use_unicode_filepath = False -filesystem_encoding = None def _reload(): global filesystem_encoding, is_unicode_platform, use_unicode_filepath From ae1a0c591bd5d3b1d2f69604a16dd77c568d863b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:58:34 -0500 Subject: [PATCH 52/93] Prefer to fix unicode_to_argv/argv_to_unicode instead of callers --- src/allmydata/scripts/cli.py | 42 +++++++++++++-------------- src/allmydata/scripts/create_node.py | 6 ++-- src/allmydata/test/cli/common.py | 5 ++-- src/allmydata/test/cli/test_backup.py | 5 ++-- src/allmydata/test/cli/test_put.py | 6 ++-- src/allmydata/test/common_util.py | 2 +- src/allmydata/test/test_runner.py | 11 +++---- src/allmydata/test/test_system.py | 4 +-- src/allmydata/util/encodingutil.py | 27 +++++++++++++++++ 9 files changed, 64 insertions(+), 44 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index c00917022..eeae20fe1 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -4,7 +4,7 @@ import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions -from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path +from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path from .tahoe_status import TahoeStatusCommand NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions): ] def parseArgs(self, where=""): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): @@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions): class AddAliasOptions(FileStoreOptions): def parseArgs(self, alias, cap): - self.alias = unicode(alias, "utf-8") + self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap @@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions): class CreateAliasOptions(FileStoreOptions): def parseArgs(self, alias): - self.alias = unicode(alias, "utf-8") + self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] @@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions): ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [PATH]" @@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions): if arg2 == "-": arg2 = None - self.from_file = unicode(arg1, "utf-8") + self.from_file = argv_to_unicode(arg1) self.to_file = None if arg2 is None else argv_to_abspath(arg2) synopsis = "[options] REMOTE_FILE LOCAL_FILE" @@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions): arg1 = None self.from_file = None if arg1 is None else argv_to_abspath(arg1) - self.to_file = None if arg2 is None else unicode(arg2, "utf-8") + self.to_file = None if arg2 is None else argv_to_unicode(arg2) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): @@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = list(unicode(a, "utf-8") for a in args[:-1]) - self.destination = unicode(args[-1], "utf-8") + self.sources = map(argv_to_unicode, args[:-1]) + self.destination = argv_to_unicode(args[-1]) synopsis = "[options] FROM.. TO" @@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions): class UnlinkOptions(FileStoreOptions): def parseArgs(self, where): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] REMOTE_FILE" description = "Remove a named file from its parent directory." class MvOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = unicode(frompath, "utf-8") - self.to_file = unicode(topath, "utf-8") + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) synopsis = "[options] FROM TO" @@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions): class LnOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = unicode(frompath, "utf-8") - self.to_file = unicode(topath, "utf-8") + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) synopsis = "[options] FROM_LINK TO_LINK" @@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions): def parseArgs(self, localdir, topath): self.from_dir = argv_to_abspath(localdir) - self.to_dir = unicode(topath, "utf-8") + self.to_dir = argv_to_unicode(topath) synopsis = "[options] FROM ALIAS:TO" def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" - g = unicode(pattern, "utf-8").strip() + g = argv_to_unicode(pattern).strip() if g: exclude = self['exclude'] exclude.add(g) @@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" @@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = map(argv_to_unicode, locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = map(argv_to_unicode, locations) synopsis = "[options] [ALIAS:PATH]" description = """ diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index ed4f0c71d..ac17cf445 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -16,7 +16,7 @@ from allmydata.scripts.common import ( ) from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.assertutil import precondition -from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding +from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding from allmydata.util import fileutil, i2p_provider, iputil, tor_provider from wormhole import wormhole @@ -238,7 +238,7 @@ def write_node_config(c, config): c.write("\n") c.write("[node]\n") - nickname = unicode(config.get("nickname") or "", "utf-8") + nickname = argv_to_unicode(config.get("nickname") or "") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) if config["hide-ip"]: c.write("reveal-IP-address = false\n") @@ -246,7 +246,7 @@ def write_node_config(c, config): c.write("reveal-IP-address = true\n") # TODO: validate webport - webport = unicode(config.get("webport") or "none", "utf-8") + webport = argv_to_unicode(config.get("webport") or "none") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index 13445ef0a..bf175de44 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,5 +1,4 @@ -from six import ensure_str - +from ...util.encodingutil import unicode_to_argv from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -46,6 +45,6 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = ensure_str(self.get_clientdir(i=client_num)) + client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index 6aecd0af6..ceecbd662 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -1,5 +1,4 @@ import os.path -from six import ensure_str from six.moves import cStringIO as StringIO from datetime import timedelta import re @@ -10,7 +9,7 @@ from twisted.python.monkey import MonkeyPatcher import __builtin__ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import get_io_encoding +from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.namespace import Namespace from allmydata.scripts import cli, backupdb from ..common_util import StallMixin @@ -414,7 +413,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): return StringIO() patcher = MonkeyPatcher((__builtin__, 'file', call_file)) - patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to']) + patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) self.failUnless(ns.called) def test_ignore_symlinks(self): diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 2deafb784..31eb671bb 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,7 +1,5 @@ import os.path -from six import ensure_str - from twisted.trial import unittest from twisted.python import usage @@ -10,7 +8,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding +from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin @@ -50,7 +48,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid(oneshare=True) rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn))) + abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 7b3194d3f..2a70cff3a 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs): encoding = kwargs.pop("encoding", None) precondition( all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)), - "arguments to run_cli must be a native string -- convert using UTF-8", + "arguments to run_cli must be a native string -- convert using unicode_to_argv", verb=verb, args=args, nodeargs=nodeargs, diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 4054dc289..2f0ac0cbe 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -23,6 +23,7 @@ from twisted.python.runtime import ( platform, ) from allmydata.util import fileutil, pollmixin +from allmydata.util.encodingutil import unicode_to_argv, get_filesystem_encoding from allmydata.test import common_util import allmydata from .common import ( @@ -71,16 +72,12 @@ def run_bintahoe(extra_argv, python_options=None): :return: A three-tuple of stdout (unicode), stderr (unicode), and the child process "returncode" (int). """ - argv = [sys.executable] + argv = [sys.executable.decode(get_filesystem_encoding())] if python_options is not None: argv.extend(python_options) argv.extend([u"-m", u"allmydata.scripts.runner"]) argv.extend(extra_argv) - if not platform.isWindows(): - # On POSIX Popen (via execvp) will encode argv using the "filesystem" - # encoding. Depending on LANG this may make our unicode arguments - # unencodable. Do our own UTF-8 encoding here instead. - argv = list(arg.encode("utf-8") for arg in argv) + argv = list(unicode_to_argv(arg) for arg in argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") @@ -109,7 +106,7 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): # -t is a harmless option that warns about tabs so we can add it # -without impacting other behavior noticably. - out, err, returncode = run_bintahoe(["--version"], python_options=["-t"]) + out, err, returncode = run_bintahoe([u"--version"], python_options=[u"-t"]) self.assertEqual(returncode, 0) self.assertTrue(out.startswith(allmydata.__appname__ + '/')) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 03b9ba2de..75219004b 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil, pollmixin, fileutil from allmydata.util import log, base32 -from allmydata.util.encodingutil import quote_output +from allmydata.util.encodingutil import quote_output, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ @@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): log.msg("test_system.SystemTest._test_runner using %r" % filename) rc,output,err = yield run_cli("debug", "dump-share", "--offsets", - ensure_str(filename)) + unicode_to_argv(filename)) self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 1c884a88d..c5a8639e8 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -18,6 +18,7 @@ if PY2: from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 from past.builtins import unicode +from six import ensure_str import sys, os, re import unicodedata @@ -106,6 +107,32 @@ def argv_to_abspath(s, **kwargs): % (quote_output(s), quote_output(os.path.join('.', s)))) return abspath_expanduser_unicode(decoded, **kwargs) + +def unicode_to_argv(s, mangle=False): + """ + Make the given unicode string suitable for use in an argv list. + + On Python 2, this encodes using UTF-8. On Python 3, this returns the + input unmodified. + """ + precondition(isinstance(s, unicode), s) + return ensure_str(s) + + +def argv_to_unicode(s): + """ + Perform the inverse of ``unicode_to_argv``. + """ + if isinstance(s, unicode): + return s + precondition(isinstance(s, bytes), s) + + try: + return unicode(s, io_encoding) + except UnicodeDecodeError: + raise usage.UsageError("Argument %s cannot be decoded as %s." % + (quote_output(s), io_encoding)) + def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 3dadd47416cc3338e7b17e035c7c8b9fcc179f8e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:11:20 -0500 Subject: [PATCH 53/93] unused import --- src/allmydata/windows/fixups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index d34404aed..b4204b5d3 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -1,6 +1,6 @@ from __future__ import print_function -import codecs, re +import codecs from functools import partial from ctypes import WINFUNCTYPE, windll, POINTER, c_int, WinError, byref, get_last_error From 8f498437cf22976f7033be7eed4731478b4baa7c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:11:23 -0500 Subject: [PATCH 54/93] whitespace --- src/allmydata/test/cli/test_put.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 31eb671bb..08a66f98d 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,5 +1,4 @@ import os.path - from twisted.trial import unittest from twisted.python import usage From db31d2bc1a85fa454aff57de8f390bed48826485 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:14:38 -0500 Subject: [PATCH 55/93] news fragment --- newsfragments/3588.incompat | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3588.incompat diff --git a/newsfragments/3588.incompat b/newsfragments/3588.incompat new file mode 100644 index 000000000..402ae8479 --- /dev/null +++ b/newsfragments/3588.incompat @@ -0,0 +1 @@ +The Tahoe command line now always uses UTF-8 to decode its arguments, regardless of locale. From 82d24bfaf7662c989816765e52d2b3fe962762dc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:46:04 -0500 Subject: [PATCH 56/93] one more --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index c5a8639e8..48d5cc7b4 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -101,7 +101,7 @@ def argv_to_abspath(s, **kwargs): Convenience function to decode an argv element to an absolute path, with ~ expanded. If this fails, raise a UsageError. """ - decoded = unicode(s, "utf-8") + decoded = argv_to_unicode(s) if decoded.startswith(u'-'): raise usage.UsageError("Path argument %s cannot start with '-'.\nUse %s if you intended to refer to a file." % (quote_output(s), quote_output(os.path.join('.', s)))) From aa4f1130270191ed3b8992e370ba684b5d5d5136 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:11:43 -0500 Subject: [PATCH 57/93] Get the monkey patching right --- src/allmydata/test/test_encodingutil.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 992ebd690..da8ee8618 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -96,10 +96,14 @@ class MockStdout(object): class EncodingUtilNonUnicodePlatform(unittest.TestCase): @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") def setUp(self): - # Mock sys.platform because unicode_platform() uses it - self.patch(sys, "platform", "linux") + # Make sure everything goes back to the way it was at the end of the + # test. self.addCleanup(_reload) + # Mock sys.platform because unicode_platform() uses it. Cleanups run + # in reverse order so we do this second so it gets undone first. + self.patch(sys, "platform", "linux") + def test_listdir_unicode(self): # What happens if latin1-encoded filenames are encountered on an UTF-8 # filesystem? @@ -131,8 +135,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): class EncodingUtil(ReallyEqualMixin): def setUp(self): - self.patch(sys, "platform", self.platform) self.addCleanup(_reload) + self.patch(sys, "platform", self.platform) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") From 46d3ffb2e287217e2afb7977bf3e41e6521ddab1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:20:50 -0500 Subject: [PATCH 58/93] diff shrink --- src/allmydata/util/encodingutil.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 48d5cc7b4..6d4cd3a8f 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -96,6 +96,20 @@ def get_io_encoding(): """ return io_encoding +def argv_to_unicode(s): + """ + Perform the inverse of ``unicode_to_argv``. + """ + if isinstance(s, unicode): + return s + precondition(isinstance(s, bytes), s) + + try: + return unicode(s, io_encoding) + except UnicodeDecodeError: + raise usage.UsageError("Argument %s cannot be decoded as %s." % + (quote_output(s), io_encoding)) + def argv_to_abspath(s, **kwargs): """ Convenience function to decode an argv element to an absolute path, with ~ expanded. @@ -119,20 +133,6 @@ def unicode_to_argv(s, mangle=False): return ensure_str(s) -def argv_to_unicode(s): - """ - Perform the inverse of ``unicode_to_argv``. - """ - if isinstance(s, unicode): - return s - precondition(isinstance(s, bytes), s) - - try: - return unicode(s, io_encoding) - except UnicodeDecodeError: - raise usage.UsageError("Argument %s cannot be decoded as %s." % - (quote_output(s), io_encoding)) - def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 99f00818a8eecec76717d977d051c1b0bdac5cb6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:21:32 -0500 Subject: [PATCH 59/93] diff shrink --- src/allmydata/util/encodingutil.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 6d4cd3a8f..679ad2055 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -98,10 +98,13 @@ def get_io_encoding(): def argv_to_unicode(s): """ - Perform the inverse of ``unicode_to_argv``. + Decode given argv element to unicode. If this fails, raise a UsageError. + + This is the inverse of ``unicode_to_argv``. """ if isinstance(s, unicode): return s + precondition(isinstance(s, bytes), s) try: From 7ca3c86a3501b7ad7c702cd76c2d81d9cd9328c0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:35:03 -0500 Subject: [PATCH 60/93] debug nonsense --- src/allmydata/scripts/runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 1f993fda1..2a41d5cf5 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -119,7 +119,8 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=stdout) + for f in stdout, open("debug.txt", "wt"): + print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) sys.exit(1) return config From ec92f0362d178fd125daaeb35e386502c5712eb2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:36:42 -0500 Subject: [PATCH 61/93] this? --- src/allmydata/scripts/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 2a41d5cf5..5af4083cb 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -119,7 +119,7 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - for f in stdout, open("debug.txt", "wt"): + for f in stdout, open("debug.txt", "wb"): print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) sys.exit(1) return config From 183ee10035cf59e427bee91ced6a66f9fb2a276e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:39:56 -0500 Subject: [PATCH 62/93] probably more useful debug info --- src/allmydata/scripts/runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 5af4083cb..ee0811ea5 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -107,6 +107,8 @@ def parse_options(argv, config=None): return config def parse_or_exit_with_explanation(argv, stdout=sys.stdout): + with open("argv-debug.txt", "wt") as f: + print(repr(argv), file=f) config = Options() try: parse_options(argv, config=config) @@ -119,8 +121,7 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - for f in stdout, open("debug.txt", "wb"): - print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) + print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=stdout) sys.exit(1) return config From e3a805caa724f5f3c9c9948c95bd71f995712dff Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:44:00 -0500 Subject: [PATCH 63/93] unicode_to_argv == id on win32 --- src/allmydata/util/encodingutil.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 679ad2055..20fecf4a1 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -129,10 +129,12 @@ def unicode_to_argv(s, mangle=False): """ Make the given unicode string suitable for use in an argv list. - On Python 2, this encodes using UTF-8. On Python 3, this returns the - input unmodified. + On Python 2 on POSIX, this encodes using UTF-8. On Python 3 and on + Windows, this returns the input unmodified. """ precondition(isinstance(s, unicode), s) + if sys.platform == "win32": + return s return ensure_str(s) From 622d67c9b937ed9ea8605c0ddd6149b255067726 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:55:40 -0500 Subject: [PATCH 64/93] done with this, thanks --- src/allmydata/scripts/runner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index ee0811ea5..1f993fda1 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -107,8 +107,6 @@ def parse_options(argv, config=None): return config def parse_or_exit_with_explanation(argv, stdout=sys.stdout): - with open("argv-debug.txt", "wt") as f: - print(repr(argv), file=f) config = Options() try: parse_options(argv, config=config) From 522f96b150cb11d0f6ddf0c00c42744d262b01b6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:56:37 -0500 Subject: [PATCH 65/93] may as well leave(/restore) this --- src/allmydata/util/encodingutil.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 20fecf4a1..28458c9dc 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -133,6 +133,9 @@ def unicode_to_argv(s, mangle=False): Windows, this returns the input unmodified. """ precondition(isinstance(s, unicode), s) + if PY3: + warnings.warn("This will be unnecessary once Python 2 is dropped.", + DeprecationWarning) if sys.platform == "win32": return s return ensure_str(s) From 5a145e74ef59c02cf72c36efb90b180eb49c913c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:59:16 -0500 Subject: [PATCH 66/93] a mild warning/suggestion here --- src/allmydata/test/test_system.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 75219004b..bf115f127 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -73,6 +73,9 @@ from ..scripts.common import ( class RunBinTahoeMixin(object): def run_bintahoe(self, args, stdin=None, python_options=[], env=None): + # test_runner.run_bintahoe has better unicode support but doesn't + # support env yet and is also synchronous. If we could get rid of + # this in favor of that, though, it would probably be an improvement. command = sys.executable argv = python_options + ["-m", "allmydata.scripts.runner"] + args From 44d76cb159b4d75312c1bcd22cd0bfa9010e620d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:00:02 -0500 Subject: [PATCH 67/93] fix formatting mistake --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 2f0ac0cbe..cf56e8baa 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -105,7 +105,7 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): # we have to have our own implementation of skipping these options. # -t is a harmless option that warns about tabs so we can add it - # -without impacting other behavior noticably. + # without impacting other behavior noticably. out, err, returncode = run_bintahoe([u"--version"], python_options=[u"-t"]) self.assertEqual(returncode, 0) self.assertTrue(out.startswith(allmydata.__appname__ + '/')) From 9c63703efc23161bda63f863963dc4f5f75cdc64 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:15:42 -0500 Subject: [PATCH 68/93] no effort being made to support these locales --- src/allmydata/test/test_encodingutil.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index da8ee8618..06340496b 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -492,20 +492,6 @@ class MacOSXLeopard(EncodingUtil, unittest.TestCase): io_encoding = 'UTF-8' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] -class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase): - uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' - platform = 'darwin' - filesystem_encoding = 'utf-8' - io_encoding = 'US-ASCII' - dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] - -class OpenBSD(EncodingUtil, unittest.TestCase): - uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' - platform = 'openbsd4' - filesystem_encoding = '646' - io_encoding = '646' - # Oops, I cannot write filenames containing non-ascii characters - class TestToFromStr(ReallyEqualMixin, unittest.TestCase): def test_to_bytes(self): From 6c430bd4e60a0e2f42a66ecf024edf0ae5b430de Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:16:18 -0500 Subject: [PATCH 69/93] re-add a direct unicode_to_argv test harder to express the conditional in skips so the two tests become one --- src/allmydata/test/test_encodingutil.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 06340496b..f7987d466 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -85,8 +85,8 @@ from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ - to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from - + to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ + unicode_to_argv class MockStdout(object): pass @@ -157,6 +157,20 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_output_py3(self): self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) + def test_unicode_to_argv(self): + """ + unicode_to_argv() returns its unicode argument on Windows and Python 2 and + converts to bytes using UTF-8 elsewhere. + """ + result = unicode_to_argv(lumiere_nfc) + if PY3 or self.platform == "win32": + expected_value = lumiere_nfc + else: + expected_value = lumiere_nfc.encode(self.io_encoding) + + self.assertIsInstance(result, type(expected_value)) + self.assertEqual(result, expected_value) + @skipIf(PY3, "Python 3 only.") def test_unicode_platform_py2(self): matrix = { From 6984f2be3ffc709aff663e69291d3ed998dd2599 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:58:07 -0500 Subject: [PATCH 70/93] Try to get the Python 2 / Windows case working --- src/allmydata/test/cli/common.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index bf175de44..f1c48d1af 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,4 +1,5 @@ -from ...util.encodingutil import unicode_to_argv +from six import ensure_str + from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -45,6 +46,12 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) + # If we were really going to launch a child process then + # `unicode_to_argv` would be the right thing to do here. However, + # we're just going to call some Python functions directly and those + # Python functions want native strings. So ignore the requirements + # for passing arguments to another process and make sure this argument + # is a native string. + client_dir = ensure_str(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) From 43dc85501f7ba57f38c33eb73a5346e5cf670e44 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 16:03:28 -0500 Subject: [PATCH 71/93] is this api less troublesome? --- src/allmydata/test/cli/test_put.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 08a66f98d..fadc09c06 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -46,21 +46,21 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.basedir = "cli/Put/unlinked_immutable_from_file" self.set_up_grid(oneshare=True) - rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) + rel_fn = unicode(os.path.join(self.basedir, "DATAFILE")) + abs_fn = abspath_expanduser_unicode(rel_fn) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") - d = self.do_cli("put", rel_fn) + d = self.do_cli_unicode(u"put", [rel_fn]) def _uploaded(args): (rc, out, err) = args readcap = out self.failUnless(readcap.startswith("URI:LIT:"), readcap) self.readcap = readcap d.addCallback(_uploaded) - d.addCallback(lambda res: self.do_cli("put", "./" + rel_fn)) + d.addCallback(lambda res: self.do_cli_unicode(u"put", [u"./" + rel_fn])) d.addCallback(lambda rc_stdout_stderr: self.failUnlessReallyEqual(rc_stdout_stderr[1], self.readcap)) - d.addCallback(lambda res: self.do_cli("put", abs_fn)) + d.addCallback(lambda res: self.do_cli_unicode(u"put", [abs_fn])) d.addCallback(lambda rc_stdout_stderr: self.failUnlessReallyEqual(rc_stdout_stderr[1], self.readcap)) # we just have to assume that ~ is handled properly From 216efb2aed019fe893b760dd4e40780da4a202e3 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 16:52:43 -0500 Subject: [PATCH 72/93] unused import --- src/allmydata/test/cli/test_put.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index fadc09c06..3392e67b4 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -7,7 +7,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin From 512897eca0310ce6ddfe262520f3c21bcf345ccf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 19 Jan 2021 13:46:32 -0500 Subject: [PATCH 73/93] news fragment --- newsfragments/3592.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3592.minor diff --git a/newsfragments/3592.minor b/newsfragments/3592.minor new file mode 100644 index 000000000..e69de29bb From 61d5f920bb9db703edb9f3aed73f2d0488ee0d2c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 19 Jan 2021 14:28:16 -0500 Subject: [PATCH 74/93] Add tests for the tag construction code and make it a bit safer Check for sane inputs, reject insane ones --- src/allmydata/test/test_hashutil.py | 36 +++++++++++++++++++++++++++++ src/allmydata/util/hashutil.py | 29 ++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_hashutil.py b/src/allmydata/test/test_hashutil.py index 6ec861c9f..6ac73ae60 100644 --- a/src/allmydata/test/test_hashutil.py +++ b/src/allmydata/test/test_hashutil.py @@ -126,6 +126,42 @@ class HashUtilTests(unittest.TestCase): base32.a2b(b"2ckv3dfzh6rgjis6ogfqhyxnzy"), ) + def test_convergence_hasher_tag(self): + """ + ``_convergence_hasher_tag`` constructs the convergence hasher tag from a + unique prefix, the required, total, and segment size parameters, and a + convergence secret. + """ + self.assertEqual( + "allmydata_immutable_content_to_key_with_added_secret_v1+" + "16:\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42," + "9:3,10,1024,", + hashutil._convergence_hasher_tag( + k=3, + n=10, + segsize=1024, + convergence=b"\x42" * 16, + ), + ) + + def test_convergence_hasher_out_of_bounds(self): + """ + ``_convergence_hasher_tag`` raises ``ValueError`` if k or n is not between + 1 and 256 inclusive or if k is greater than n. + """ + segsize = 1024 + secret = b"\x42" * 16 + for bad_k in (0, 2, 257): + with self.assertRaises(ValueError): + hashutil._convergence_hasher_tag( + k=bad_k, n=1, segsize=segsize, convergence=secret, + ) + for bad_n in (0, 1, 257): + with self.assertRaises(ValueError): + hashutil._convergence_hasher_tag( + k=2, n=bad_n, segsize=segsize, convergence=secret, + ) + def test_known_answers(self): """ Verify backwards compatibility by comparing hash outputs for some diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index ebb2f12af..f82c04efd 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -176,10 +176,37 @@ def convergence_hash(k, n, segsize, data, convergence): return h.digest() -def convergence_hasher(k, n, segsize, convergence): +def _convergence_hasher_tag(k, n, segsize, convergence): + """ + Create the convergence hashing tag. + + :param int k: Required shares. + :param int n: Total shares. + :param int segsize: Maximum segment size. + :param bytes convergence: The convergence secret. + + :return bytes: The bytestring to use as a tag in the convergence hash. + """ assert isinstance(convergence, bytes) + if k > n: + raise ValueError( + "k > n not allowed; k = {}, n = {}".format(k, n), + ) + if k < 1 or n < 1: + raise ValueError( + "k, n < 1 not allowed; k = {}, n = {}".format(k, n), + ) + if k > 256 or n > 256: + raise ValueError( + "k, n > 256 not allowed; k = {}, n = {}".format(k, n), + ) param_tag = netstring(b"%d,%d,%d" % (k, n, segsize)) tag = CONVERGENT_ENCRYPTION_TAG + netstring(convergence) + param_tag + return tag + + +def convergence_hasher(k, n, segsize, convergence): + tag = _convergence_hasher_tag(k, n, segsize, convergence) return tagged_hasher(tag, KEYLEN) From 64f3e1277e720321da8e22d598acf1e013c1d0a1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 27 Jan 2021 11:33:09 -0500 Subject: [PATCH 75/93] Replace eliot_logged_test with something ... else The previous implementation relied on TestCase.addCleanup in an unreliable way. This implementation pushes the decoration logic in to the test method itself on the premise that test methods that do weird stuff are better supported than `run` methods that do weird stuff. Also add some more tests for this behavior. --- src/allmydata/test/eliotutil.py | 198 +++++++++++++-------------- src/allmydata/test/test_eliotutil.py | 68 +++++++++ 2 files changed, 166 insertions(+), 100 deletions(-) diff --git a/src/allmydata/test/eliotutil.py b/src/allmydata/test/eliotutil.py index 88b4ec90e..3579eb31f 100644 --- a/src/allmydata/test/eliotutil.py +++ b/src/allmydata/test/eliotutil.py @@ -6,17 +6,22 @@ Tools aimed at the interaction between tests and Eliot. # Can't use `builtins.str` because it's not JSON encodable: # `exceptions.TypeError: is not JSON-encodeable` from past.builtins import unicode as str -from future.utils import PY3 +from future.utils import PY2 +from six import ensure_text __all__ = [ "RUN_TEST", "EliotLoggedRunTest", - "eliot_logged_test", ] +try: + from typing import Callable +except ImportError: + pass + from functools import ( - wraps, partial, + wraps, ) import attr @@ -24,11 +29,11 @@ import attr from eliot import ( ActionType, Field, + MemoryLogger, ) -from eliot.testing import capture_logging - -from twisted.internet.defer import ( - maybeDeferred, +from eliot.testing import ( + swap_logger, + check_for_errors, ) from ..util.jsonbytes import BytesJSONEncoder @@ -48,92 +53,12 @@ RUN_TEST = ActionType( ) -def eliot_logged_test(f): - """ - Decorate a test method to run in a dedicated Eliot action context. - - The action will finish after the test is done (after the returned Deferred - fires, if a Deferred is returned). It will note the name of the test - being run. - - All messages emitted by the test will be validated. They will still be - delivered to the global logger. - """ - # A convenient, mutable container into which nested functions can write - # state to be shared among them. - class storage(object): - pass - - - # On Python 3, we want to use our custom JSON encoder when validating - # messages can be encoded to JSON: - if PY3: - capture = lambda f : capture_logging(None, encoder_=BytesJSONEncoder)(f) - else: - capture = lambda f : capture_logging(None)(f) - - @wraps(f) - def run_and_republish(self, *a, **kw): - # Unfortunately the only way to get at the global/default logger... - # This import is delayed here so that we get the *current* default - # logger at the time the decorated function is run. - from eliot._output import _DEFAULT_LOGGER as default_logger - - def republish(): - # This is called as a cleanup function after capture_logging has - # restored the global/default logger to its original state. We - # can now emit messages that go to whatever global destinations - # are installed. - - # storage.logger.serialize() seems like it would make more sense - # than storage.logger.messages here. However, serialize() - # explodes, seemingly as a result of double-serializing the logged - # messages. I don't understand this. - for msg in storage.logger.messages: - default_logger.write(msg) - - # And now that we've re-published all of the test's messages, we - # can finish the test's action. - storage.action.finish() - - @capture - def run(self, logger): - # Record the MemoryLogger for later message extraction. - storage.logger = logger - # Give the test access to the logger as well. It would be just - # fine to pass this as a keyword argument to `f` but implementing - # that now will give me conflict headaches so I'm not doing it. - self.eliot_logger = logger - return f(self, *a, **kw) - - # Arrange for all messages written to the memory logger that - # `capture_logging` installs to be re-written to the global/default - # logger so they might end up in a log file somewhere, if someone - # wants. This has to be done in a cleanup function (or later) because - # capture_logging restores the original logger in a cleanup function. - # We install our cleanup function here, before we call run, so that it - # runs *after* the cleanup function capture_logging installs (cleanup - # functions are a stack). - self.addCleanup(republish) - - # Begin an action that should comprise all messages from the decorated - # test method. - with RUN_TEST(name=self.id()).context() as action: - # When the test method Deferred fires, the RUN_TEST action is - # done. However, we won't have re-published the MemoryLogger - # messages into the global/default logger when this Deferred - # fires. So we need to delay finishing the action until that has - # happened. Record the action so we can do that. - storage.action = action - - # Support both Deferred-returning and non-Deferred-returning - # tests. - d = maybeDeferred(run, self) - - # Let the test runner do its thing. - return d - - return run_and_republish +# On Python 3, we want to use our custom JSON encoder when validating messages +# can be encoded to JSON: +if PY2: + _memory_logger = MemoryLogger +else: + _memory_logger = lambda: MemoryLogger(encoder=BytesJSONEncoder) @attr.s @@ -174,10 +99,83 @@ class EliotLoggedRunTest(object): def id(self): return self.case.id() - @eliot_logged_test - def run(self, result=None): - return self._run_tests_with_factory( - self.case, - self.handlers, - self.last_resort, - ).run(result) + def run(self, result): + """ + Run the test case in the context of a distinct Eliot action. + + The action will finish after the test is done. It will note the name of + the test being run. + + All messages emitted by the test will be validated. They will still be + delivered to the global logger. + """ + # The idea here is to decorate the test method itself so that all of + # the extra logic happens at the point where test/application logic is + # expected to be. This `run` method is more like test infrastructure + # and things do not go well when we add too much extra behavior here. + # For example, exceptions raised here often just kill the whole + # runner. + + # So, grab the test method. + name = self.case._testMethodName + original = getattr(self.case, name) + try: + # Patch in a decorated version of it. + setattr(self.case, name, with_logging(ensure_text(self.case.id()), original)) + # Then use the rest of the machinery to run it. + return self._run_tests_with_factory( + self.case, + self.handlers, + self.last_resort, + ).run(result) + finally: + # Clean up the patching for idempotency or something. + delattr(self.case, name) + + +def with_logging( + test_id, # type: unicode + test_method, # type: Callable +): + """ + Decorate a test method with additional log-related behaviors. + + 1. The test method will run in a distinct Eliot action. + 2. Typed log messages will be validated. + 3. Logged tracebacks will be added as errors. + + :param test_id: The full identifier of the test being decorated. + :param test_method: The method itself. + """ + @wraps(test_method) + def run_with_logging(*args, **kwargs): + validating_logger = _memory_logger() + original = swap_logger(None) + try: + swap_logger(_TwoLoggers(original, validating_logger)) + with RUN_TEST(name=test_id): + try: + return test_method(*args, **kwargs) + finally: + check_for_errors(validating_logger) + finally: + swap_logger(original) + return run_with_logging + + +class _TwoLoggers(object): + """ + Log to two loggers. + + A single logger can have multiple destinations so this isn't typically a + useful thing to do. However, MemoryLogger has inline validation instead + of destinations. That means this *is* useful to simultaneously write to + the normal places and validate all written log messages. + """ + def __init__(self, a, b): + self._a = a + self._b = b + + def write(self, *args, **kwargs): + self._a.write(*args, **kwargs) + self._b.write(*args, **kwargs) diff --git a/src/allmydata/test/test_eliotutil.py b/src/allmydata/test/test_eliotutil.py index 0073a7675..aca677323 100644 --- a/src/allmydata/test/test_eliotutil.py +++ b/src/allmydata/test/test_eliotutil.py @@ -18,17 +18,25 @@ if PY2: from sys import stdout import logging +from unittest import ( + skip, +) + from fixtures import ( TempDir, ) from testtools import ( TestCase, ) +from testtools import ( + TestResult, +) from testtools.matchers import ( Is, IsInstance, MatchesStructure, Equals, + HasLength, AfterPreprocessing, ) from testtools.twistedsupport import ( @@ -38,12 +46,16 @@ from testtools.twistedsupport import ( from eliot import ( Message, + MessageType, + fields, FileDestination, + MemoryLogger, ) from eliot.twisted import DeferredContext from eliot.testing import ( capture_logging, assertHasAction, + swap_logger, ) from twisted.internet.defer import ( @@ -173,6 +185,62 @@ class EliotLoggingTests(TestCase): ), ) + def test_validation_failure(self): + """ + If a test emits a log message that fails validation then an error is added + to the result. + """ + # Make sure we preserve the original global Eliot state. + original = swap_logger(MemoryLogger()) + self.addCleanup(lambda: swap_logger(original)) + + class ValidationFailureProbe(SyncTestCase): + def test_bad_message(self): + # This message does not validate because "Hello" is not an + # int. + MSG = MessageType("test:eliotutil", fields(foo=int)) + MSG(foo="Hello").write() + + result = TestResult() + case = ValidationFailureProbe("test_bad_message") + case.run(result) + + self.assertThat( + result.errors, + HasLength(1), + ) + + def test_skip_cleans_up(self): + """ + After a skipped test the global Eliot logging state is restored. + """ + # Save the logger that's active before we do anything so that we can + # restore it later. Also install another logger so we can compare it + # to the active logger later. + expected = MemoryLogger() + original = swap_logger(expected) + + # Restore it, whatever else happens. + self.addCleanup(lambda: swap_logger(original)) + + class SkipProbe(SyncTestCase): + @skip("It's a skip test.") + def test_skipped(self): + pass + + case = SkipProbe("test_skipped") + case.run() + + # Retrieve the logger that's active now that the skipped test is done + # so we can check it against the expected value. + actual = swap_logger(MemoryLogger()) + self.assertThat( + actual, + Is(expected), + ) + + + class LogCallDeferredTests(TestCase): """ Tests for ``log_call_deferred``. From 740fe9fef7f83bb4de0bdbf79f33560d9ca909dc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 27 Jan 2021 11:36:53 -0500 Subject: [PATCH 76/93] news fragment --- newsfragments/3600.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3600.minor diff --git a/newsfragments/3600.minor b/newsfragments/3600.minor new file mode 100644 index 000000000..e69de29bb From 81edda80115c0bfcb97bbb35ad59c5817f27df65 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 28 Jan 2021 15:11:13 -0500 Subject: [PATCH 77/93] ... this? --- src/allmydata/test/eliotutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/eliotutil.py b/src/allmydata/test/eliotutil.py index 3579eb31f..f91d76e03 100644 --- a/src/allmydata/test/eliotutil.py +++ b/src/allmydata/test/eliotutil.py @@ -134,7 +134,7 @@ class EliotLoggedRunTest(object): def with_logging( - test_id, # type: unicode + test_id, # type: str test_method, # type: Callable ): """ From dfffa8722a57dd8705ddff45dcbb2794e7d4bedf Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 3 Feb 2021 11:45:30 -0500 Subject: [PATCH 78/93] Get rid of allmydata.web.common_py3. --- src/allmydata/web/common.py | 136 +++++++++++++++++++++++++++++- src/allmydata/web/common_py3.py | 143 -------------------------------- src/allmydata/web/storage.py | 2 +- 3 files changed, 133 insertions(+), 148 deletions(-) delete mode 100644 src/allmydata/web/common_py3.py diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index 0a39d7336..03371d092 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -1,6 +1,11 @@ from past.builtins import unicode from six import ensure_text, ensure_str +try: + from typing import Optional +except ImportError: + pass + import time import json from functools import wraps @@ -74,11 +79,13 @@ from allmydata.util.encodingutil import ( quote_output, to_bytes, ) +from allmydata.util import abbreviate -# Originally part of this module, so still part of its API: -from .common_py3 import ( # noqa: F401 - get_arg, abbreviate_time, MultiFormatResource, WebError, -) + +class WebError(Exception): + def __init__(self, text, code=http.BAD_REQUEST): + self.text = text + self.code = code def get_filenode_metadata(filenode): @@ -689,3 +696,124 @@ def url_for_string(req, url_string): port=port, ) return url + + +def get_arg(req, argname, default=None, multiple=False): + """Extract an argument from either the query args (req.args) or the form + body fields (req.fields). If multiple=False, this returns a single value + (or the default, which defaults to None), and the query args take + precedence. If multiple=True, this returns a tuple of arguments (possibly + empty), starting with all those in the query args. + + :param TahoeLAFSRequest req: The request to consider. + + :return: Either bytes or tuple of bytes. + """ + if isinstance(argname, unicode): + argname = argname.encode("utf-8") + if isinstance(default, unicode): + default = default.encode("utf-8") + results = [] + if argname in req.args: + results.extend(req.args[argname]) + argname_unicode = unicode(argname, "utf-8") + if req.fields and argname_unicode in req.fields: + value = req.fields[argname_unicode].value + if isinstance(value, unicode): + value = value.encode("utf-8") + results.append(value) + if multiple: + return tuple(results) + if results: + return results[0] + return default + + +class MultiFormatResource(resource.Resource, object): + """ + ``MultiFormatResource`` is a ``resource.Resource`` that can be rendered in + a number of different formats. + + Rendered format is controlled by a query argument (given by + ``self.formatArgument``). Different resources may support different + formats but ``json`` is a pretty common one. ``html`` is the default + format if nothing else is given as the ``formatDefault``. + """ + formatArgument = "t" + formatDefault = None # type: Optional[str] + + def render(self, req): + """ + Dispatch to a renderer for a particular format, as selected by a query + argument. + + A renderer for the format given by the query argument matching + ``formatArgument`` will be selected and invoked. render_HTML will be + used as a default if no format is selected (either by query arguments + or by ``formatDefault``). + + :return: The result of the selected renderer. + """ + t = get_arg(req, self.formatArgument, self.formatDefault) + # It's either bytes or None. + if isinstance(t, bytes): + t = unicode(t, "ascii") + renderer = self._get_renderer(t) + result = renderer(req) + # On Python 3, json.dumps() returns Unicode for example, but + # twisted.web expects bytes. Instead of updating every single render + # method, just handle Unicode one time here. + if isinstance(result, unicode): + result = result.encode("utf-8") + return result + + def _get_renderer(self, fmt): + """ + Get the renderer for the indicated format. + + :param str fmt: The format. If a method with a prefix of ``render_`` + and a suffix of this format (upper-cased) is found, it will be + used. + + :return: A callable which takes a twisted.web Request and renders a + response. + """ + renderer = None + + if fmt is not None: + try: + renderer = getattr(self, "render_{}".format(fmt.upper())) + except AttributeError: + return resource.ErrorPage( + http.BAD_REQUEST, + "Bad Format", + "Unknown {} value: {!r}".format(self.formatArgument, fmt), + ).render + + if renderer is None: + renderer = self.render_HTML + + return renderer + + +def abbreviate_time(data): + """ + Convert number of seconds into human readable string. + + :param data: Either ``None`` or integer or float, seconds. + + :return: Unicode string. + """ + # 1.23s, 790ms, 132us + if data is None: + return u"" + s = float(data) + if s >= 10: + return abbreviate.abbreviate_time(data) + if s >= 1.0: + return u"%.2fs" % s + if s >= 0.01: + return u"%.0fms" % (1000*s) + if s >= 0.001: + return u"%.1fms" % (1000*s) + return u"%.0fus" % (1000000*s) diff --git a/src/allmydata/web/common_py3.py b/src/allmydata/web/common_py3.py deleted file mode 100644 index 439d346fa..000000000 --- a/src/allmydata/web/common_py3.py +++ /dev/null @@ -1,143 +0,0 @@ -""" -Common utilities that are available from Python 3. - -Can eventually be merged back into allmydata.web.common. -""" - -from past.builtins import unicode - -try: - from typing import Optional -except ImportError: - pass - -from twisted.web import resource, http - -from allmydata.util import abbreviate - - -class WebError(Exception): - def __init__(self, text, code=http.BAD_REQUEST): - self.text = text - self.code = code - - -def get_arg(req, argname, default=None, multiple=False): - """Extract an argument from either the query args (req.args) or the form - body fields (req.fields). If multiple=False, this returns a single value - (or the default, which defaults to None), and the query args take - precedence. If multiple=True, this returns a tuple of arguments (possibly - empty), starting with all those in the query args. - - :param TahoeLAFSRequest req: The request to consider. - - :return: Either bytes or tuple of bytes. - """ - if isinstance(argname, unicode): - argname = argname.encode("utf-8") - if isinstance(default, unicode): - default = default.encode("utf-8") - results = [] - if argname in req.args: - results.extend(req.args[argname]) - argname_unicode = unicode(argname, "utf-8") - if req.fields and argname_unicode in req.fields: - value = req.fields[argname_unicode].value - if isinstance(value, unicode): - value = value.encode("utf-8") - results.append(value) - if multiple: - return tuple(results) - if results: - return results[0] - return default - - -class MultiFormatResource(resource.Resource, object): - """ - ``MultiFormatResource`` is a ``resource.Resource`` that can be rendered in - a number of different formats. - - Rendered format is controlled by a query argument (given by - ``self.formatArgument``). Different resources may support different - formats but ``json`` is a pretty common one. ``html`` is the default - format if nothing else is given as the ``formatDefault``. - """ - formatArgument = "t" - formatDefault = None # type: Optional[str] - - def render(self, req): - """ - Dispatch to a renderer for a particular format, as selected by a query - argument. - - A renderer for the format given by the query argument matching - ``formatArgument`` will be selected and invoked. render_HTML will be - used as a default if no format is selected (either by query arguments - or by ``formatDefault``). - - :return: The result of the selected renderer. - """ - t = get_arg(req, self.formatArgument, self.formatDefault) - # It's either bytes or None. - if isinstance(t, bytes): - t = unicode(t, "ascii") - renderer = self._get_renderer(t) - result = renderer(req) - # On Python 3, json.dumps() returns Unicode for example, but - # twisted.web expects bytes. Instead of updating every single render - # method, just handle Unicode one time here. - if isinstance(result, unicode): - result = result.encode("utf-8") - return result - - def _get_renderer(self, fmt): - """ - Get the renderer for the indicated format. - - :param str fmt: The format. If a method with a prefix of ``render_`` - and a suffix of this format (upper-cased) is found, it will be - used. - - :return: A callable which takes a twisted.web Request and renders a - response. - """ - renderer = None - - if fmt is not None: - try: - renderer = getattr(self, "render_{}".format(fmt.upper())) - except AttributeError: - return resource.ErrorPage( - http.BAD_REQUEST, - "Bad Format", - "Unknown {} value: {!r}".format(self.formatArgument, fmt), - ).render - - if renderer is None: - renderer = self.render_HTML - - return renderer - - -def abbreviate_time(data): - """ - Convert number of seconds into human readable string. - - :param data: Either ``None`` or integer or float, seconds. - - :return: Unicode string. - """ - # 1.23s, 790ms, 132us - if data is None: - return u"" - s = float(data) - if s >= 10: - return abbreviate.abbreviate_time(data) - if s >= 1.0: - return u"%.2fs" % s - if s >= 0.01: - return u"%.0fms" % (1000*s) - if s >= 0.001: - return u"%.1fms" % (1000*s) - return u"%.0fus" % (1000000*s) diff --git a/src/allmydata/web/storage.py b/src/allmydata/web/storage.py index 82c789d9b..9452c2a4e 100644 --- a/src/allmydata/web/storage.py +++ b/src/allmydata/web/storage.py @@ -9,7 +9,7 @@ from twisted.web.template import ( renderer, renderElement ) -from allmydata.web.common_py3 import ( +from allmydata.web.common import ( abbreviate_time, MultiFormatResource ) From 8fdbb6db6eecb038fe1ecbfa2e261f0efe4266e4 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 4 Feb 2021 11:22:48 -0500 Subject: [PATCH 79/93] Nail down types. --- src/allmydata/dirnode.py | 13 ++++++++++--- src/allmydata/test/test_dirnode.py | 4 ++-- src/allmydata/test/web/test_util.py | 11 ++++++----- src/allmydata/web/common.py | 21 +++++++++++---------- 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index 6871b94c7..85e2d54c4 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -74,6 +74,13 @@ ADD_FILE = ActionType( u"Add a new file as a child of a directory.", ) + +class _OnlyFiles(object): + """Marker for replacement option of only replacing files.""" + +ONLY_FILES = _OnlyFiles() + + def update_metadata(metadata, new_metadata, now): """Updates 'metadata' in-place with the information in 'new_metadata'. @@ -179,7 +186,7 @@ class Adder(object): if entries is None: entries = {} precondition(isinstance(entries, dict), entries) - precondition(overwrite in (True, False, "only-files"), overwrite) + precondition(overwrite in (True, False, ONLY_FILES), overwrite) # keys of 'entries' may not be normalized. self.entries = entries self.overwrite = overwrite @@ -205,7 +212,7 @@ class Adder(object): if not self.overwrite: raise ExistingChildError("child %s already exists" % quote_output(name, encoding='utf-8')) - if self.overwrite == "only-files" and IDirectoryNode.providedBy(children[name][0]): + if self.overwrite == ONLY_FILES and IDirectoryNode.providedBy(children[name][0]): raise ExistingChildError("child %s already exists as a directory" % quote_output(name, encoding='utf-8')) metadata = children[name][1].copy() @@ -701,7 +708,7 @@ class DirectoryNode(object): 'new_child_namex' and 'current_child_namex' need not be normalized. The overwrite parameter may be True (overwrite any existing child), - False (error if the new child link already exists), or "only-files" + False (error if the new child link already exists), or ONLY_FILES (error if the new child link exists and points to a directory). """ if self.is_readonly() or new_parent.is_readonly(): diff --git a/src/allmydata/test/test_dirnode.py b/src/allmydata/test/test_dirnode.py index 8e5e59b46..23a0fd76b 100644 --- a/src/allmydata/test/test_dirnode.py +++ b/src/allmydata/test/test_dirnode.py @@ -1978,12 +1978,12 @@ class Adder(GridTestMixin, unittest.TestCase, testutil.ShouldFailMixin): overwrite=False)) d.addCallback(lambda res: root_node.set_node(u'file1', filenode, - overwrite="only-files")) + overwrite=dirnode.ONLY_FILES)) d.addCallback(lambda res: self.shouldFail(ExistingChildError, "set_node", "child 'dir1' already exists", root_node.set_node, u'dir1', filenode, - overwrite="only-files")) + overwrite=dirnode.ONLY_FILES)) return d d.addCallback(_test_adder) diff --git a/src/allmydata/test/web/test_util.py b/src/allmydata/test/web/test_util.py index 5f4d6bb88..c536dc9f1 100644 --- a/src/allmydata/test/web/test_util.py +++ b/src/allmydata/test/web/test_util.py @@ -12,17 +12,18 @@ if PY2: from twisted.trial import unittest from allmydata.web import status, common +from allmydata.dirnode import ONLY_FILES from ..common import ShouldFailMixin from .. import common_util as testutil class Util(ShouldFailMixin, testutil.ReallyEqualMixin, unittest.TestCase): def test_parse_replace_arg(self): - self.failUnlessReallyEqual(common.parse_replace_arg("true"), True) - self.failUnlessReallyEqual(common.parse_replace_arg("false"), False) - self.failUnlessReallyEqual(common.parse_replace_arg("only-files"), - "only-files") - self.failUnlessRaises(common.WebError, common.parse_replace_arg, "only_fles") + self.failUnlessReallyEqual(common.parse_replace_arg(b"true"), True) + self.failUnlessReallyEqual(common.parse_replace_arg(b"false"), False) + self.failUnlessReallyEqual(common.parse_replace_arg(b"only-files"), + ONLY_FILES) + self.failUnlessRaises(common.WebError, common.parse_replace_arg, b"only_fles") def test_abbreviate_time(self): self.failUnlessReallyEqual(common.abbreviate_time(None), "") diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index 03371d092..3a1f9ef53 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -2,7 +2,7 @@ from past.builtins import unicode from six import ensure_text, ensure_str try: - from typing import Optional + from typing import Optional, Union except ImportError: pass @@ -56,6 +56,7 @@ from twisted.web.resource import ( IResource, ) +from allmydata.dirnode import ONLY_FILES, _OnlyFiles from allmydata import blacklist from allmydata.interfaces import ( EmptyPathnameComponentError, @@ -105,17 +106,17 @@ def get_filenode_metadata(filenode): metadata['size'] = size return metadata -def boolean_of_arg(arg): - # TODO: "" - arg = ensure_text(arg) - if arg.lower() not in ("true", "t", "1", "false", "f", "0", "on", "off"): +def boolean_of_arg(arg): # type: (bytes) -> bool + assert isinstance(arg, bytes) + if arg.lower() not in (b"true", b"t", b"1", b"false", b"f", b"0", b"on", b"off"): raise WebError("invalid boolean argument: %r" % (arg,), http.BAD_REQUEST) - return arg.lower() in ("true", "t", "1", "on") + return arg.lower() in (b"true", b"t", b"1", b"on") -def parse_replace_arg(replace): - replace = ensure_text(replace) - if replace.lower() == "only-files": - return replace + +def parse_replace_arg(replace): # type: (bytes) -> Union[bool,_OnlyFiles] + assert isinstance(replace, bytes) + if replace.lower() == b"only-files": + return ONLY_FILES try: return boolean_of_arg(replace) except WebError: From c44b46e0a6431d06c503a802f24474625bad4e01 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 4 Feb 2021 11:44:26 -0500 Subject: [PATCH 80/93] More type annotation. --- src/allmydata/web/common.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index 3a1f9ef53..2d1c51b47 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -153,19 +153,19 @@ def get_mutable_type(file_format): # accepts result of get_format() return None -def parse_offset_arg(offset): +def parse_offset_arg(offset): # type: (bytes) -> Union[int,None] # XXX: This will raise a ValueError when invoked on something that # is not an integer. Is that okay? Or do we want a better error # message? Since this call is going to be used by programmers and # their tools rather than users (through the wui), it is not # inconsistent to return that, I guess. if offset is not None: - offset = int(offset) + return int(offset) return offset -def get_root(req): +def get_root(req): # type: (IRequest) -> unicode """ Get a relative path with parent directory segments that refers to the root location known to the given request. This seems a lot like the constant @@ -699,7 +699,7 @@ def url_for_string(req, url_string): return url -def get_arg(req, argname, default=None, multiple=False): +def get_arg(req, argname, default=None, multiple=False): # type (IRequest, Union[bytes,unicode], Any, bool) -> Union[bytes,Tuple[bytes]] """Extract an argument from either the query args (req.args) or the form body fields (req.fields). If multiple=False, this returns a single value (or the default, which defaults to None), and the query args take From 92f5001596de497764f1737e801e57dd564d3e37 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 4 Feb 2021 14:10:22 -0500 Subject: [PATCH 81/93] Port to Python 3. --- src/allmydata/util/_python3.py | 1 + src/allmydata/web/common.py | 50 ++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/src/allmydata/util/_python3.py b/src/allmydata/util/_python3.py index 066928ca4..2f0cebbba 100644 --- a/src/allmydata/util/_python3.py +++ b/src/allmydata/util/_python3.py @@ -115,6 +115,7 @@ PORTED_MODULES = [ "allmydata.util.spans", "allmydata.util.statistics", "allmydata.util.time_format", + "allmydata.web.common", "allmydata.web.logs", "allmydata.webish", ] diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index 2d1c51b47..5309c2612 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -1,8 +1,20 @@ -from past.builtins import unicode -from six import ensure_text, ensure_str +""" +Ported to Python 3. +""" +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 + from past.builtins import unicode as str # prevent leaking newbytes/newstr into code that can't handle it + +from six import ensure_str try: - from typing import Optional, Union + from typing import Optional, Union, Tuple, Any except ImportError: pass @@ -165,7 +177,7 @@ def parse_offset_arg(offset): # type: (bytes) -> Union[int,None] return offset -def get_root(req): # type: (IRequest) -> unicode +def get_root(req): # type: (IRequest) -> str """ Get a relative path with parent directory segments that refers to the root location known to the given request. This seems a lot like the constant @@ -194,8 +206,8 @@ def convert_children_json(nodemaker, children_json): children = {} if children_json: data = json.loads(children_json) - for (namex, (ctype, propdict)) in data.items(): - namex = unicode(namex) + for (namex, (ctype, propdict)) in list(data.items()): + namex = str(namex) writecap = to_bytes(propdict.get("rw_uri")) readcap = to_bytes(propdict.get("ro_uri")) metadata = propdict.get("metadata", {}) @@ -216,7 +228,8 @@ def compute_rate(bytes, seconds): assert bytes > -1 assert seconds > 0 - return 1.0 * bytes / seconds + return bytes / seconds + def abbreviate_rate(data): """ @@ -237,6 +250,7 @@ def abbreviate_rate(data): return u"%.1fkBps" % (r/1000) return u"%.0fBps" % r + def abbreviate_size(data): """ Convert number of bytes into human readable strings (unicode). @@ -273,7 +287,7 @@ def text_plain(text, req): return text def spaces_to_nbsp(text): - return unicode(text).replace(u' ', u'\u00A0') + return str(text).replace(u' ', u'\u00A0') def render_time_delta(time_1, time_2): return spaces_to_nbsp(format_delta(time_1, time_2)) @@ -291,7 +305,7 @@ def render_time_attr(t): # actual exception). The latter is growing increasingly annoying. def should_create_intermediate_directories(req): - t = unicode(get_arg(req, "t", "").strip(), "ascii") + t = str(get_arg(req, "t", "").strip(), "ascii") return bool(req.method in (b"PUT", b"POST") and t not in ("delete", "rename", "rename-form", "check")) @@ -573,7 +587,7 @@ def _finish(result, render, request): resource=fullyQualifiedName(type(result)), ) result.render(request) - elif isinstance(result, unicode): + elif isinstance(result, str): Message.log( message_type=u"allmydata:web:common-render:unicode", ) @@ -655,7 +669,7 @@ def _renderHTTP_exception(request, failure): def _renderHTTP_exception_simple(request, text, code): request.setResponseCode(code) request.setHeader("content-type", "text/plain;charset=utf-8") - if isinstance(text, unicode): + if isinstance(text, str): text = text.encode("utf-8") request.setHeader("content-length", b"%d" % len(text)) return text @@ -699,7 +713,7 @@ def url_for_string(req, url_string): return url -def get_arg(req, argname, default=None, multiple=False): # type (IRequest, Union[bytes,unicode], Any, bool) -> Union[bytes,Tuple[bytes]] +def get_arg(req, argname, default=None, multiple=False): # type: (IRequest, Union[bytes,str], Any, bool) -> Union[bytes,Tuple[bytes],Any] """Extract an argument from either the query args (req.args) or the form body fields (req.fields). If multiple=False, this returns a single value (or the default, which defaults to None), and the query args take @@ -710,17 +724,17 @@ def get_arg(req, argname, default=None, multiple=False): # type (IRequest, Unio :return: Either bytes or tuple of bytes. """ - if isinstance(argname, unicode): + if isinstance(argname, str): argname = argname.encode("utf-8") - if isinstance(default, unicode): + if isinstance(default, str): default = default.encode("utf-8") results = [] if argname in req.args: results.extend(req.args[argname]) - argname_unicode = unicode(argname, "utf-8") + argname_unicode = str(argname, "utf-8") if req.fields and argname_unicode in req.fields: value = req.fields[argname_unicode].value - if isinstance(value, unicode): + if isinstance(value, str): value = value.encode("utf-8") results.append(value) if multiple: @@ -758,13 +772,13 @@ class MultiFormatResource(resource.Resource, object): t = get_arg(req, self.formatArgument, self.formatDefault) # It's either bytes or None. if isinstance(t, bytes): - t = unicode(t, "ascii") + t = str(t, "ascii") renderer = self._get_renderer(t) result = renderer(req) # On Python 3, json.dumps() returns Unicode for example, but # twisted.web expects bytes. Instead of updating every single render # method, just handle Unicode one time here. - if isinstance(result, unicode): + if isinstance(result, str): result = result.encode("utf-8") return result From e3bb368184d0037768d8d33edd6f80877d374329 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 4 Feb 2021 14:10:41 -0500 Subject: [PATCH 82/93] News file. --- newsfragments/3607.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3607.minor diff --git a/newsfragments/3607.minor b/newsfragments/3607.minor new file mode 100644 index 000000000..e69de29bb From 9b2a9e14ae1b1ceaab4f08de634d5cfe1d97a41d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 9 Feb 2021 21:21:31 -0500 Subject: [PATCH 83/93] Re-add the check so we still get early failure if this ever happens --- src/allmydata/util/encodingutil.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 28458c9dc..483871b5d 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -69,6 +69,7 @@ def _reload(): global filesystem_encoding, is_unicode_platform, use_unicode_filepath filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) + check_encoding(filesystem_encoding) is_unicode_platform = PY3 or sys.platform in ["win32", "darwin"] # Despite the Unicode-mode FilePath support added to Twisted in From 5568170c243ab717221571ca61a48c9a717dadbf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 11 Feb 2021 15:46:04 -0500 Subject: [PATCH 84/93] Slightly better docs for the share count limits on convergence hash tag --- src/allmydata/util/hashutil.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index f82c04efd..8525dd95e 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -180,8 +180,8 @@ def _convergence_hasher_tag(k, n, segsize, convergence): """ Create the convergence hashing tag. - :param int k: Required shares. - :param int n: Total shares. + :param int k: Required shares (in [1..256]). + :param int n: Total shares (in [1..256]). :param int segsize: Maximum segment size. :param bytes convergence: The convergence secret. @@ -193,10 +193,17 @@ def _convergence_hasher_tag(k, n, segsize, convergence): "k > n not allowed; k = {}, n = {}".format(k, n), ) if k < 1 or n < 1: + # It doesn't make sense to have zero shares. Zero shares carry no + # information, cannot encode any part of the application data. raise ValueError( "k, n < 1 not allowed; k = {}, n = {}".format(k, n), ) if k > 256 or n > 256: + # ZFEC supports encoding application data into a maximum of 256 + # shares. If we ignore the limitations of ZFEC, it may be fine to use + # a configuration with more shares than that and it may be fine to + # construct a convergence tag from such a configuration. Since ZFEC + # is the only supported encoder, though, this is moot for now. raise ValueError( "k, n > 256 not allowed; k = {}, n = {}".format(k, n), ) From 451ede2666318cec793c70fd7ce9d279a20d6556 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 11 Feb 2021 15:58:28 -0500 Subject: [PATCH 85/93] news fragment --- newsfragments/3326.installation | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3326.installation diff --git a/newsfragments/3326.installation b/newsfragments/3326.installation new file mode 100644 index 000000000..2a3a64e32 --- /dev/null +++ b/newsfragments/3326.installation @@ -0,0 +1 @@ +Debian 8 support has been replaced with Debian 10 support. From 7f224414233c2c82317c1bf2bda005602787d327 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 11 Feb 2021 15:59:51 -0500 Subject: [PATCH 86/93] Change Debian 8 to Debian 10 for CI --- .circleci/config.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 29b55ad5f..42e47957c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,7 +29,7 @@ workflows: - "debian-9": &DOCKERHUB_CONTEXT context: "dockerhub-auth" - - "debian-8": + - "debian-10": <<: *DOCKERHUB_CONTEXT requires: - "debian-9" @@ -107,7 +107,7 @@ workflows: - "master" jobs: - - "build-image-debian-8": + - "build-image-debian-10": <<: *DOCKERHUB_CONTEXT - "build-image-debian-9": <<: *DOCKERHUB_CONTEXT @@ -277,11 +277,11 @@ jobs: fi - debian-8: + debian-10: <<: *DEBIAN docker: - <<: *DOCKERHUB_AUTH - image: "tahoelafsci/debian:8-py2.7" + image: "tahoelafsci/debian:10-py2.7" user: "nobody" @@ -529,12 +529,12 @@ jobs: docker push tahoelafsci/${DISTRO}:${TAG}-py${PYTHON_VERSION} - build-image-debian-8: + build-image-debian-10: <<: *BUILD_IMAGE environment: DISTRO: "debian" - TAG: "8" + TAG: "10" PYTHON_VERSION: "2.7" From a8b1c204d2b836e70d84f8ae9179fba523648059 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 11 Feb 2021 16:06:18 -0500 Subject: [PATCH 87/93] Mark the expected result literal as the correct type, bytes --- src/allmydata/test/test_hashutil.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_hashutil.py b/src/allmydata/test/test_hashutil.py index 6ac73ae60..482e79c0b 100644 --- a/src/allmydata/test/test_hashutil.py +++ b/src/allmydata/test/test_hashutil.py @@ -133,9 +133,9 @@ class HashUtilTests(unittest.TestCase): convergence secret. """ self.assertEqual( - "allmydata_immutable_content_to_key_with_added_secret_v1+" - "16:\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42," - "9:3,10,1024,", + b"allmydata_immutable_content_to_key_with_added_secret_v1+" + b"16:\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42," + b"9:3,10,1024,", hashutil._convergence_hasher_tag( k=3, n=10, From 145d6b63ee1f5349403c8303c4ac6cbf8efad4a5 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Feb 2021 09:48:33 -0500 Subject: [PATCH 88/93] Document trinary logic. --- src/allmydata/dirnode.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index 85e2d54c4..69ff85cbd 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -182,6 +182,11 @@ class MetadataSetter(object): class Adder(object): def __init__(self, node, entries=None, overwrite=True, create_readonly_node=None): + """ + :param overwrite: Either True (allow overwriting anything existing), + False (don't allow overwriting), or ONLY_FILES (only files can be + overwritten). + """ self.node = node if entries is None: entries = {} From 9a9b4bb232549c0802c74593454e2451a62d9e3d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 10:14:23 -0500 Subject: [PATCH 89/93] Use a monkey patching library for monkey patching --- src/allmydata/test/eliotutil.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/eliotutil.py b/src/allmydata/test/eliotutil.py index f91d76e03..2a5c85ebf 100644 --- a/src/allmydata/test/eliotutil.py +++ b/src/allmydata/test/eliotutil.py @@ -36,6 +36,10 @@ from eliot.testing import ( check_for_errors, ) +from twisted.python.monkey import ( + MonkeyPatcher, +) + from ..util.jsonbytes import BytesJSONEncoder @@ -115,13 +119,16 @@ class EliotLoggedRunTest(object): # and things do not go well when we add too much extra behavior here. # For example, exceptions raised here often just kill the whole # runner. + patcher = MonkeyPatcher() # So, grab the test method. name = self.case._testMethodName original = getattr(self.case, name) + decorated = with_logging(ensure_text(self.case.id()), original) + patcher.addPatch(self.case, name, decorated) try: - # Patch in a decorated version of it. - setattr(self.case, name, with_logging(ensure_text(self.case.id()), original)) + # Patch it in + patcher.patch() # Then use the rest of the machinery to run it. return self._run_tests_with_factory( self.case, @@ -130,7 +137,7 @@ class EliotLoggedRunTest(object): ).run(result) finally: # Clean up the patching for idempotency or something. - delattr(self.case, name) + patcher.restore() def with_logging( From 585c554081be261daa43b8750819b74029c65252 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 10:22:17 -0500 Subject: [PATCH 90/93] Clarify _TwoLoggers --- src/allmydata/test/eliotutil.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/eliotutil.py b/src/allmydata/test/eliotutil.py index 2a5c85ebf..63c24f08a 100644 --- a/src/allmydata/test/eliotutil.py +++ b/src/allmydata/test/eliotutil.py @@ -26,10 +26,15 @@ from functools import ( import attr +from zope.interface import ( + implementer, +) + from eliot import ( ActionType, Field, MemoryLogger, + ILogger, ) from eliot.testing import ( swap_logger, @@ -170,6 +175,7 @@ def with_logging( return run_with_logging +@implementer(ILogger) class _TwoLoggers(object): """ Log to two loggers. @@ -180,9 +186,13 @@ class _TwoLoggers(object): the normal places and validate all written log messages. """ def __init__(self, a, b): - self._a = a - self._b = b + """ + :param ILogger a: One logger + :param ILogger b: Another logger + """ + self._a = a # type: ILogger + self._b = b # type: ILogger - def write(self, *args, **kwargs): - self._a.write(*args, **kwargs) - self._b.write(*args, **kwargs) + def write(self, dictionary, serializer=None): + self._a.write(dictionary, serializer) + self._b.write(dictionary, serializer) From 5aa452c8bbbb0e6a9a7e03e77859830d1b2adc4a Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Feb 2021 10:23:07 -0500 Subject: [PATCH 91/93] Drop build-porting-depgraph step. --- .circleci/config.yml | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 42e47957c..ad5360082 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -86,11 +86,6 @@ workflows: # integration tests. - "debian-9" - # Generate the underlying data for a visualization to aid with Python 3 - # porting. - - "build-porting-depgraph": - <<: *DOCKERHUB_CONTEXT - - "typechecks": <<: *DOCKERHUB_CONTEXT @@ -451,33 +446,6 @@ jobs: # them in parallel. nix-build --cores 3 --max-jobs 2 nix/ - # Generate up-to-date data for the dependency graph visualizer. - build-porting-depgraph: - # Get a system in which we can easily install Tahoe-LAFS and all its - # dependencies. The dependency graph analyzer works by executing the code. - # It's Python, what do you expect? - <<: *DEBIAN - - steps: - - "checkout" - - - add_ssh_keys: - fingerprints: - # Jean-Paul Calderone (CircleCI depgraph key) - # This lets us push to tahoe-lafs/tahoe-depgraph in the next step. - - "86:38:18:a7:c0:97:42:43:18:46:55:d6:21:b0:5f:d4" - - - run: - name: "Setup Python Environment" - command: | - /tmp/venv/bin/pip install -e /tmp/project - - - run: - name: "Generate dependency graph data" - command: | - . /tmp/venv/bin/activate - ./misc/python3/depgraph.sh - typechecks: docker: - <<: *DOCKERHUB_AUTH From 66b5a1577acd7d3757e4a8ceb638240c4ec45b87 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 12 Feb 2021 10:23:20 -0500 Subject: [PATCH 92/93] News file. --- newsfragments/3612.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3612.minor diff --git a/newsfragments/3612.minor b/newsfragments/3612.minor new file mode 100644 index 000000000..e69de29bb From 11e1fabbe4b1d9241223983ba3cb8e4416c984a0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 12 Feb 2021 13:10:31 -0500 Subject: [PATCH 93/93] Change the platform check to one mypy can recognize :/ --- src/allmydata/test/common.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index 3c9bfa7aa..230bca648 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -17,6 +17,7 @@ __all__ = [ from past.builtins import chr as byteschr, unicode +import sys import os, random, struct import six import tempfile @@ -52,9 +53,6 @@ from testtools.twistedsupport import ( flush_logged_errors, ) -from twisted.python.runtime import ( - platform, -) from twisted.application import service from twisted.plugin import IPlugin from twisted.internet import defer @@ -108,7 +106,7 @@ from .eliotutil import ( ) from .common_util import ShouldFailMixin # noqa: F401 -if platform.isWindows(): +if sys.platform == "win32": # Python 2.7 doesn't have good options for launching a process with # non-ASCII in its command line. So use this alternative that does a # better job. However, only use it on Windows because it doesn't work