From e937b062a1fa8b3597fcefe3514cc81c59952b8e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 15:56:42 -0500 Subject: [PATCH 001/201] news fragment --- newsfragments/3385.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3385.minor diff --git a/newsfragments/3385.minor b/newsfragments/3385.minor new file mode 100644 index 000000000..e69de29bb From 13bcd8170baf586b9b6a59273b6e924f312a6f0d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 15:57:10 -0500 Subject: [PATCH 002/201] Turn on Coveralls on GitHub actions --- .github/workflows/ci.yml | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fd5049104..d401a1f24 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,11 +59,41 @@ jobs: name: eliot.log path: eliot.log - - name: Upload coverage report - uses: codecov/codecov-action@v1 + # Upload this job's coverage data to Coveralls. + - name: "Report Coverage to Coveralls" + uses: "coverallsapp/github-action@v1.1.2" with: - token: abf679b6-e2e6-4b33-b7b5-6cfbd41ee691 - file: coverage.xml + github-token: "${{ secrets.github_token }}" + # Every source of coverage reports needs a unique "flag name". + # Construct one by smashing a few variables from the matrix together + # here. + flag-name: "run-${{ matrix.os }}-${{ matrix.python-version }}" + # Mark the data as just one piece of many because we have more than + # one instance of this job (Windows, macOS) which collects and + # reports coverage. This is necessary to cause Coveralls to merge + # multiple coverage results into a single report. + parallel: true + + # Tell Coveralls that we're done reporting coverage data. Since we're using + # the "parallel" mode where more than one coverage data file is merged into + # a single report, we have to tell Coveralls when we've uploaded all of the + # data files. This does it. We make sure it runs last by making it depend + # on *all* of the coverage-collecting jobs. + finish-coverage-report: + # There happens to just be one coverage-collecting job at the moment. If + # the coverage reports are broken and someone added more + # coverage-collecting jobs to this workflow but didn't update this, that's + # why. + needs: + - "coverage" + runs-on: "ubuntu-latest" + steps: + - name: "Finish Coveralls Reporting" + uses: "coverallsapp/github-action@v1.1.2" + with: + github-token: "${{ secrets.github_token }}" + # Here's the magic that tells Coveralls we're done. + parallel-finished: true integration: runs-on: ${{ matrix.os }} From b4128a8d10eac897794b517e867e34c11e4c551c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 15:58:21 -0500 Subject: [PATCH 003/201] Stop collecting coverage on CircleCI --- .circleci/config.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index afa3fafa1..8a1452714 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -210,7 +210,7 @@ jobs: # filenames and argv). LANG: "en_US.UTF-8" # Select a tox environment to run for this job. - TAHOE_LAFS_TOX_ENVIRONMENT: "py27-coverage" + TAHOE_LAFS_TOX_ENVIRONMENT: "py27" # Additional arguments to pass to tox. TAHOE_LAFS_TOX_ARGS: "" # The path in which test artifacts will be placed. @@ -220,7 +220,7 @@ jobs: WHEELHOUSE_PATH: &WHEELHOUSE_PATH "/tmp/wheelhouse" PIP_FIND_LINKS: "file:///tmp/wheelhouse" # Upload the coverage report. - UPLOAD_COVERAGE: "yes" + UPLOAD_COVERAGE: "" # pip cannot install packages if the working directory is not readable. # We want to run a lot of steps as nobody instead of as root. @@ -373,7 +373,7 @@ jobs: # this reporter on Python 3. So drop that and just specify the # reporter. TAHOE_LAFS_TRIAL_ARGS: "--reporter=subunitv2-file" - TAHOE_LAFS_TOX_ENVIRONMENT: "py36-coverage" + TAHOE_LAFS_TOX_ENVIRONMENT: "py36" ubuntu-20-04: From ca8f7d73f2ab29a0236432aaa024b94bf8768fe4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 15:59:03 -0500 Subject: [PATCH 004/201] Stop using codecov --- .codecov.yml | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) delete mode 100644 .codecov.yml diff --git a/.codecov.yml b/.codecov.yml deleted file mode 100644 index 166190c5e..000000000 --- a/.codecov.yml +++ /dev/null @@ -1,48 +0,0 @@ -# Override defaults for codecov.io checks. -# -# Documentation is at https://docs.codecov.io/docs/codecov-yaml; -# reference is at https://docs.codecov.io/docs/codecovyml-reference. -# -# To validate this file, use: -# -# curl --data-binary @.codecov.yml https://codecov.io/validate -# -# Codecov's defaults seem to leave red marks in GitHub CI checks in a -# rather arbitrary manner, probably because of non-determinism in -# coverage (see https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2891) -# and maybe because computers are bad with floating point numbers. - -# Allow coverage percentage a precision of zero decimals, and round to -# the nearest number (for example, 89.957 to to 90; 89.497 to 89%). -# Coverage above 90% is good, below 80% is bad. -coverage: - round: nearest - range: 80..90 - precision: 0 - - # Aim for a target test coverage of 90% in codecov/project check (do - # not allow project coverage to drop below that), and allow - # codecov/patch a threshold of 1% (allow coverage in changes to drop - # by that much, and no less). That should be good enough for us. - status: - project: - default: - target: 90% - threshold: 1% - patch: - default: - threshold: 1% - - -codecov: - # This is a public repository so supposedly we don't "need" to use an upload - # token. However, using one makes sure that CI jobs running against forked - # repositories have coverage uploaded to the right place in codecov so - # their reports aren't incomplete. - token: "abf679b6-e2e6-4b33-b7b5-6cfbd41ee691" - - notify: - # The reference documentation suggests that this is the default setting: - # https://docs.codecov.io/docs/codecovyml-reference#codecovnotifywait_for_ci - # However observation suggests otherwise. - wait_for_ci: true From 3fb412eda1c71db389c224b2f395cf57c5974500 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 16:20:41 -0500 Subject: [PATCH 005/201] Perhaps this is the correct github-token to use. The coveralls docs might be wrong. --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d401a1f24..3cc59124d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" uses: "coverallsapp/github-action@v1.1.2" with: - github-token: "${{ secrets.github_token }}" + github-token: "${{ secrets.GITHUB_TOKEN }}" # Every source of coverage reports needs a unique "flag name". # Construct one by smashing a few variables from the matrix together # here. @@ -91,7 +91,7 @@ jobs: - name: "Finish Coveralls Reporting" uses: "coverallsapp/github-action@v1.1.2" with: - github-token: "${{ secrets.github_token }}" + github-token: "${{ secrets.GITHUB_TOKEN }}" # Here's the magic that tells Coveralls we're done. parallel-finished: true From bebcca39f62c872107c27ae637bef9ed6c19ecaa Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 20:09:46 -0500 Subject: [PATCH 006/201] Switch to coveralls-python, maybe it works better --- .github/workflows/ci.yml | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3cc59124d..ec640fa88 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,18 +61,23 @@ jobs: # Upload this job's coverage data to Coveralls. - name: "Report Coverage to Coveralls" - uses: "coverallsapp/github-action@v1.1.2" - with: - github-token: "${{ secrets.GITHUB_TOKEN }}" + run: | + pip install coveralls + python -m coveralls + env: + # Some magic value required for some magic reason. + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + # Help coveralls identify our project. + COVERALLS_REPO_TOKEN: "JPf16rLB7T2yjgATIxFzTsEgMdN1UNq6o" # Every source of coverage reports needs a unique "flag name". # Construct one by smashing a few variables from the matrix together # here. - flag-name: "run-${{ matrix.os }}-${{ matrix.python-version }}" + COVERALLS_FLAG_NAME: "run-${{ matrix.os }}-${{ matrix.python-version }}" # Mark the data as just one piece of many because we have more than # one instance of this job (Windows, macOS) which collects and # reports coverage. This is necessary to cause Coveralls to merge # multiple coverage results into a single report. - parallel: true + COVERALLS_PARALLEL: true # Tell Coveralls that we're done reporting coverage data. Since we're using # the "parallel" mode where more than one coverage data file is merged into @@ -89,11 +94,14 @@ jobs: runs-on: "ubuntu-latest" steps: - name: "Finish Coveralls Reporting" - uses: "coverallsapp/github-action@v1.1.2" - with: - github-token: "${{ secrets.GITHUB_TOKEN }}" - # Here's the magic that tells Coveralls we're done. - parallel-finished: true + run: | + pip install coveralls + python -m coveralls --finish + env: + # Some magic value required for some magic reason. + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" + # Help coveralls identify our project. + COVERALLS_REPO_TOKEN: "JPf16rLB7T2yjgATIxFzTsEgMdN1UNq6o" integration: runs-on: ${{ matrix.os }} From 9a8a61b74042a8870e83e4700a177627e02a528a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 20:31:32 -0500 Subject: [PATCH 007/201] Further tweaks to help the last step --- .github/workflows/ci.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ec640fa88..b89654c0d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,10 +92,14 @@ jobs: needs: - "coverage" runs-on: "ubuntu-latest" + # Get a Python 3 environment because only the Python 3 release of + # coveralls-python has the `--finish` flag... + container: "python:3-slim" steps: - name: "Finish Coveralls Reporting" run: | - pip install coveralls + # Also install wheel otherwise `docopt` may fail to build + pip install wheel coveralls python -m coveralls --finish env: # Some magic value required for some magic reason. From f3aca51e35cbc04571f76347df64d2feafd80718 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 5 Jan 2021 20:52:02 -0500 Subject: [PATCH 008/201] run in debug mode to collect info for bug report --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b89654c0d..810b607af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -100,7 +100,7 @@ jobs: run: | # Also install wheel otherwise `docopt` may fail to build pip install wheel coveralls - python -m coveralls --finish + python -m coveralls debug --finish env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From 4b65751f5165b2c10febfee9848774a2b8aa591a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 09:00:16 -0500 Subject: [PATCH 009/201] Debug the earlier submissions to see what build_num they're providing --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 810b607af..1d244b4ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" run: | pip install coveralls - python -m coveralls + python -m coveralls debug env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From adf3518fc1d768de9f6aec58e9fe8ddf52f80947 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 14:32:48 -0500 Subject: [PATCH 010/201] Okay I guess that served its purpose --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 1d244b4ce..810b607af 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" run: | pip install coveralls - python -m coveralls debug + python -m coveralls env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From f24cc5da0c0ac7b631335de9dbb3634b6d48d402 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 14:33:00 -0500 Subject: [PATCH 011/201] The angrier I am the more words I write --- .github/workflows/ci.yml | 77 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 71 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 810b607af..74454de6c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,15 +92,80 @@ jobs: needs: - "coverage" runs-on: "ubuntu-latest" - # Get a Python 3 environment because only the Python 3 release of - # coveralls-python has the `--finish` flag... - container: "python:3-slim" + # Get a runtime environment with curl so we can use it to send the HTTP + # request. + container: "curlimages/curl:7.74.0" steps: - name: "Finish Coveralls Reporting" run: | - # Also install wheel otherwise `docopt` may fail to build - pip install wheel coveralls - python -m coveralls debug --finish + # coveralls-python does have a `--finish` option but it doesn't seem + # to work, at least for us. + # https://github.com/coveralls-clients/coveralls-python/issues/248 + # + # But all it does is this simple POST so we can just send it + # ourselves. The only hard part is guessing what the POST + # parameters mean. + # + # Since the build is done I'm going to guess that "done" is a fine + # value for status. + # + # That leaves "build_num". The coveralls documentation gives some + # hints about it. It suggests using $CIRCLE_WORKFLOW_ID if your job + # is on CircleCI. CircleCI documentation says this about + # CIRCLE_WORKFLOW_ID: + # + # A unique identifier for the workflow instance of the current + # job. This identifier is the same for every job in a given + # workflow instance. + # + # (from https://circleci.com/docs/2.0/env-vars/) + # + # A CircleCI workflow is roughly "the group of jobs run for a + # particular commit". There are exceptions to this but maybe we can + # ignore them. + # + # The only over hints we get from Coveralls about "build_num" are: + # + # * An example value of `1234` + # + # * Another example value of `$BUILD_NUMBER` where BUILD_NUMBER is + # not defined anywhere. + # + # Starting from the CircleCI workflow example, then, and looking at + # the environment variables GitHub Actions offers + # (https://docs.github.com/en/free-pro-team@latest/actions/reference/environment-variables#default-environment-variables) there are two of interest: + # + # * GITHUB_RUN_ID - A unique number for each run within a + # repository. This number does not change if you re-run the + # workflow run. + # + # * GITHUB_RUN_NUMBER - A unique number for each run of a particular + # workflow in a repository. This number begins at 1 for the + # workflow's first run, and increments with each new run. This + # number does not change if you re-run the workflow run. + # + # These seem to offer approximately the same value and only differ + # on whether they will be unique for the project as a whole or just + # for this particular workflow ("ci", as defined by this file). + # + # Unfortunately neither of them changes if a workflow is re-run. + # This differs from the behavior of CircleCI's CIRCLE_WORKFLOW_ID + # where a new value is assigned if a workflow is re-run. + # + # The consequence of this would seem to be that multiple runs of a + # GitHub Actions workflow will have their coverage reported to the + # same job. And since we eventually "finish" a job, later runs + # would be discarded (I suppose). + # + # There doesn't seem to be a way to do any better, though. + # + # However, we have the further constraint that our build_num must + # agree with whatever python-coveralls has selected. An inspection + # of the python-coveralls source reveals (perhaps unsurprisingly) + # its authors have selected GITHUB_RUN_ID. + # + # Thus, we select the same. + curl -k https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN -d "payload[build_num]=GITHUB_RUN_ID&payload[status]=done" env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From 8e8a7d82c642b91008d61ab9496465fb384d94ff Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 14:33:31 -0500 Subject: [PATCH 012/201] it was one or the other. turns out it was the other. --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 74454de6c..f15610333 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -160,8 +160,8 @@ jobs: # There doesn't seem to be a way to do any better, though. # # However, we have the further constraint that our build_num must - # agree with whatever python-coveralls has selected. An inspection - # of the python-coveralls source reveals (perhaps unsurprisingly) + # agree with whatever coveralls-python has selected. An inspection + # of the coveralls-python source reveals (perhaps unsurprisingly) # its authors have selected GITHUB_RUN_ID. # # Thus, we select the same. From 4d2782c1787838416df0963230aefde3371af631 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 14:57:31 -0500 Subject: [PATCH 013/201] Hahaha. Resolve the variable, don't just include its name. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f15610333..ff4a89ecd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -165,7 +165,7 @@ jobs: # its authors have selected GITHUB_RUN_ID. # # Thus, we select the same. - curl -k https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN -d "payload[build_num]=GITHUB_RUN_ID&payload[status]=done" + curl -k https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN -d "payload[build_num]=$GITHUB_RUN_ID&payload[status]=done" env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From 24a531474df7e3af3c21d37a287853832326bc7d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 15:38:12 -0500 Subject: [PATCH 014/201] So much for my ability to read and understand a Python program coveralls complained: {"error":"No build matching CI build number 467026020 found"} So try constructing a build_num that looks like the value we observed from `coveralls` output when it was submitting coverage data. --- .github/workflows/ci.yml | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ff4a89ecd..e9de9ba86 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -92,9 +92,6 @@ jobs: needs: - "coverage" runs-on: "ubuntu-latest" - # Get a runtime environment with curl so we can use it to send the HTTP - # request. - container: "curlimages/curl:7.74.0" steps: - name: "Finish Coveralls Reporting" run: | @@ -161,11 +158,25 @@ jobs: # # However, we have the further constraint that our build_num must # agree with whatever coveralls-python has selected. An inspection - # of the coveralls-python source reveals (perhaps unsurprisingly) - # its authors have selected GITHUB_RUN_ID. + # of the coveralls-python source suggests that GITHUB_RUN_ID is + # used. However, observation of the coveralls.io web interface + # suggests the value instead is something more like: + # + # $(git rev-parse refs/remotes/pull//merge)-PR- # # Thus, we select the same. - curl -k https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN -d "payload[build_num]=$GITHUB_RUN_ID&payload[status]=done" + # + # GITHUB_REF is a string like the rev being parsed above. We + # extract the PR number from it. + PR=$(echo $GITHUB_REF | cut -d / -f 4) + REV=$(git rev-parse $GITHUB_REF) + BUILD_NUM=$REV-PR-$PR + REPO_NAME=$GITHUB_REPOSITORY + curl \ + -k \ + https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN \ + -d \ + "payload[build_num]=$BUILD_NUM&payload[status]=done&payload[repo_name]=$REPO_NAME" env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From 52c42b5118100982ae777c82607de4e51c588631 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 15:48:20 -0500 Subject: [PATCH 015/201] dump this info, who knows how many more rounds this will take --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e9de9ba86..2b213493e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" run: | pip install coveralls - python -m coveralls + python -m coveralls --verbose env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From fa863c94782f4011ab8d2c74051a6f0e3d3141d5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 15:48:31 -0500 Subject: [PATCH 016/201] speed up the test cycle --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index c61331885..a1b988b91 100644 --- a/tox.ini +++ b/tox.ini @@ -50,7 +50,7 @@ extras = test setenv = # Define TEST_SUITE in the environment as an aid to constructing the # correct test command below. - !py36: TEST_SUITE = allmydata + !py36: TEST_SUITE = allmydata.test.test_abbreviate py36: TEST_SUITE = allmydata.test.python3_tests commands = From fac12210cb76e5cc3600ed6016ce960eab42095c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 15:58:48 -0500 Subject: [PATCH 017/201] Can't do that Git stuff without a checkout Maybe *that* is why `coveralls --finish` fails? --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2b213493e..243d865e4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -93,6 +93,9 @@ jobs: - "coverage" runs-on: "ubuntu-latest" steps: + - name: "Check out Tahoe-LAFS sources" + uses: "actions/checkout@v2" + - name: "Finish Coveralls Reporting" run: | # coveralls-python does have a `--finish` option but it doesn't seem From 89c54af01db145996d680b8ade82370e3564c7b1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:05:35 -0500 Subject: [PATCH 018/201] Guess we need the rest of the repo too, surprise. --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 243d865e4..473cc7d53 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,6 +96,10 @@ jobs: - name: "Check out Tahoe-LAFS sources" uses: "actions/checkout@v2" + - name: "Fetch all history for all tags and branches" + run: | + git fetch --prune --unshallow + - name: "Finish Coveralls Reporting" run: | # coveralls-python does have a `--finish` option but it doesn't seem From d515887ba1b1b0a6af9efb575883ba9e46f12201 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:15:02 -0500 Subject: [PATCH 019/201] This is probably faster and may actually work `git fetch --prune --unshallow` doesn't seem to get refs/remotes/pull//merge but that's okay because HEAD is already set to that --- .github/workflows/ci.yml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 473cc7d53..633dd85e3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -96,10 +96,6 @@ jobs: - name: "Check out Tahoe-LAFS sources" uses: "actions/checkout@v2" - - name: "Fetch all history for all tags and branches" - run: | - git fetch --prune --unshallow - - name: "Finish Coveralls Reporting" run: | # coveralls-python does have a `--finish` option but it doesn't seem @@ -175,8 +171,11 @@ jobs: # # GITHUB_REF is a string like the rev being parsed above. We # extract the PR number from it. + # + # actions/checkout@v2 makes HEAD the same as refs/remotes/pull//merge so we can just rev-parse that. PR=$(echo $GITHUB_REF | cut -d / -f 4) - REV=$(git rev-parse $GITHUB_REF) + REV=$(git rev-parse HEAD) BUILD_NUM=$REV-PR-$PR REPO_NAME=$GITHUB_REPOSITORY curl \ From 59e385c00f5a85247959e922f20f20438163798f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:24:53 -0500 Subject: [PATCH 020/201] apparently it doesn't have `remotes` in there --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 633dd85e3..7b46acbf3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -165,16 +165,16 @@ jobs: # used. However, observation of the coveralls.io web interface # suggests the value instead is something more like: # - # $(git rev-parse refs/remotes/pull//merge)-PR- + # $(git rev-parse refs/pull//merge)-PR- # # Thus, we select the same. # # GITHUB_REF is a string like the rev being parsed above. We # extract the PR number from it. # - # actions/checkout@v2 makes HEAD the same as refs/remotes/pull//merge so we can just rev-parse that. - PR=$(echo $GITHUB_REF | cut -d / -f 4) + PR=$(echo $GITHUB_REF | cut -d / -f 3) REV=$(git rev-parse HEAD) BUILD_NUM=$REV-PR-$PR REPO_NAME=$GITHUB_REPOSITORY From e3a6f43dc9cf51b9c917d8c585735806b839bc73 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:43:11 -0500 Subject: [PATCH 021/201] less shell wankery --- .github/workflows/ci.yml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7b46acbf3..0451c3bd1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -168,16 +168,19 @@ jobs: # $(git rev-parse refs/pull//merge)-PR- # # Thus, we select the same. - # - # GITHUB_REF is a string like the rev being parsed above. We - # extract the PR number from it. - # - # actions/checkout@v2 makes HEAD the same as refs/pull//merge so we can just rev-parse that. - PR=$(echo $GITHUB_REF | cut -d / -f 3) + + # refs/pull//merge was justed checked out by so we can just + # rev-parse HEAD to find the revision. REV=$(git rev-parse HEAD) + + # We can get the PR number from the "context". + # https://github.community/t/github-ref-is-inconsistent/17728/3 + # https://docs.github.com/en/free-pro-team@latest/articles/events-that-trigger-workflows + PR=${{ github.event.number }} + BUILD_NUM=$REV-PR-$PR REPO_NAME=$GITHUB_REPOSITORY + curl \ -k \ https://coveralls.io/webhook?repo_token=$COVERALLS_REPO_TOKEN \ From 3855fe0a6d983123dc085eb7d705ca25ce9b8c88 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 16:43:36 -0500 Subject: [PATCH 022/201] reinstate full test suite --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index a1b988b91..c61331885 100644 --- a/tox.ini +++ b/tox.ini @@ -50,7 +50,7 @@ extras = test setenv = # Define TEST_SUITE in the environment as an aid to constructing the # correct test command below. - !py36: TEST_SUITE = allmydata.test.test_abbreviate + !py36: TEST_SUITE = allmydata py36: TEST_SUITE = allmydata.test.python3_tests commands = From 35614d13828427ad65f3ae9686241f75e7bbb423 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 17:46:40 -0500 Subject: [PATCH 023/201] Remove the codecov badge --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 98150ed27..f12a41945 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ Free and Open decentralized data store `Tahoe-LAFS `__ (Tahoe Least-Authority File Store) is the first free software / open-source storage technology that distributes your data across multiple servers. Even if some servers fail or are taken over by an attacker, the entire file store continues to function correctly, preserving your privacy and security. -|Contributor Covenant| |readthedocs| |travis| |circleci| |codecov| +|Contributor Covenant| |readthedocs| |travis| |circleci| Table of contents From e9a7838d882a96128ec64ea7b93eecce4aa93b54 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 17:46:50 -0500 Subject: [PATCH 024/201] Remove the codecov anchor --- README.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/README.rst b/README.rst index f12a41945..ef094456c 100644 --- a/README.rst +++ b/README.rst @@ -125,9 +125,6 @@ See `TGPPL.PDF `__ for why the TGPPL ex .. |circleci| image:: https://circleci.com/gh/tahoe-lafs/tahoe-lafs.svg?style=svg :target: https://circleci.com/gh/tahoe-lafs/tahoe-lafs -.. |codecov| image:: https://codecov.io/github/tahoe-lafs/tahoe-lafs/coverage.svg?branch=master - :alt: test coverage percentage - :target: https://codecov.io/github/tahoe-lafs/tahoe-lafs?branch=master .. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg :alt: code of conduct From b06730b9afb6a67465ace0515ac3c4d5bceb7864 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 17:46:58 -0500 Subject: [PATCH 025/201] Add the coveralls.io badge --- README.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index ef094456c..b1f6d2563 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ Free and Open decentralized data store `Tahoe-LAFS `__ (Tahoe Least-Authority File Store) is the first free software / open-source storage technology that distributes your data across multiple servers. Even if some servers fail or are taken over by an attacker, the entire file store continues to function correctly, preserving your privacy and security. -|Contributor Covenant| |readthedocs| |travis| |circleci| +|Contributor Covenant| |readthedocs| |travis| |circleci| |coveralls| Table of contents @@ -125,6 +125,9 @@ See `TGPPL.PDF `__ for why the TGPPL ex .. |circleci| image:: https://circleci.com/gh/tahoe-lafs/tahoe-lafs.svg?style=svg :target: https://circleci.com/gh/tahoe-lafs/tahoe-lafs +.. |coveralls| image:: https://coveralls.io/repos/github/tahoe-lafs/tahoe-lafs/badge.svg + :alt: code coverage + :target: https://coveralls.io/github/tahoe-lafs/tahoe-lafs .. |Contributor Covenant| image:: https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg :alt: code of conduct From f5ba293f79de8672ca1e625da1ee942b79a2ef0b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 20:27:17 -0500 Subject: [PATCH 026/201] Ideally this is no longer necessary --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0451c3bd1..6d21cd247 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: - name: "Report Coverage to Coveralls" run: | pip install coveralls - python -m coveralls --verbose + python -m coveralls env: # Some magic value required for some magic reason. GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" From e382ef8a893b1984d3f1fc1033f36b92fb55da4a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 20:27:41 -0500 Subject: [PATCH 027/201] Clean up the explanation, link to some more/better stuff --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d21cd247..2aaf2f6c0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -162,20 +162,36 @@ jobs: # However, we have the further constraint that our build_num must # agree with whatever coveralls-python has selected. An inspection # of the coveralls-python source suggests that GITHUB_RUN_ID is - # used. However, observation of the coveralls.io web interface - # suggests the value instead is something more like: + # used: # - # $(git rev-parse refs/pull//merge)-PR- + # * https://github.com/coveralls-clients/coveralls-python/blob/a9b36299ce9ba3bb6858700781881029d82e545d/coveralls/api.py#L105-L109 + # * https://github.com/coveralls-clients/coveralls-python/blob/a9b36299ce9ba3bb6858700781881029d82e545d/coveralls/api.py#L54-L59 # - # Thus, we select the same. + # However, observation of the coveralls.io web interface, logs from + # the coveralls command in action, and experimentation suggests the + # value instead is something more like: + # + # -PR- + # + # For PRs it is the merge commit (`refs/pull//merge`). For + # branches, the tip. - # refs/pull//merge was justed checked out by so we can just - # rev-parse HEAD to find the revision. + # For pull requests, refs/pull//merge was just checked out + # by so HEAD will refer to the right revision. For branches, HEAD + # is also the tip of the branch. REV=$(git rev-parse HEAD) # We can get the PR number from the "context". - # https://github.community/t/github-ref-is-inconsistent/17728/3 - # https://docs.github.com/en/free-pro-team@latest/articles/events-that-trigger-workflows + # + # https://docs.github.com/en/free-pro-team@latest/developers/webhooks-and-events/webhook-events-and-payloads#pull_request + # + # (via ). + # + # If this is a pull request, `github.event` is a `pull_request` + # structure which has `number` right in it. + # + # If this is a push, `github.event` is a `push` instead and ... XXX ??? + PR=${{ github.event.number }} BUILD_NUM=$REV-PR-$PR From 3b8df95e3e19d4b3aaa30799ff6e630fb10fae7f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Wed, 6 Jan 2021 20:52:12 -0500 Subject: [PATCH 028/201] Try constructing build_num differently for push --- .github/workflows/ci.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2aaf2f6c0..701b6815c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -190,11 +190,16 @@ jobs: # If this is a pull request, `github.event` is a `pull_request` # structure which has `number` right in it. # - # If this is a push, `github.event` is a `push` instead and ... XXX ??? + # If this is a push, `github.event` is a `push` instead but we only + # need the revision to construct the build_num. PR=${{ github.event.number }} - BUILD_NUM=$REV-PR-$PR + if [ "${PR}" = "" ]; then + BUILD_NUM=$REV + else + BUILD_NUM=$REV-PR-$PR + fi REPO_NAME=$GITHUB_REPOSITORY curl \ From 709823e5629140299d2f1fb191dbdfdbd3583dd7 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Thu, 7 Jan 2021 18:24:57 -0500 Subject: [PATCH 029/201] most of those words proved irrelevant --- .github/workflows/ci.yml | 66 ++++------------------------------------ 1 file changed, 6 insertions(+), 60 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 701b6815c..5c7b21151 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -104,7 +104,7 @@ jobs: # # But all it does is this simple POST so we can just send it # ourselves. The only hard part is guessing what the POST - # parameters mean. + # parameters mean. And I've done that for you already. # # Since the build is done I'm going to guess that "done" is a fine # value for status. @@ -114,67 +114,13 @@ jobs: # is on CircleCI. CircleCI documentation says this about # CIRCLE_WORKFLOW_ID: # - # A unique identifier for the workflow instance of the current - # job. This identifier is the same for every job in a given - # workflow instance. + # Observation of the coveralls.io web interface, logs from the + # coveralls command in action, and experimentation suggests the + # value for PRs is something more like: # - # (from https://circleci.com/docs/2.0/env-vars/) + # -PR- # - # A CircleCI workflow is roughly "the group of jobs run for a - # particular commit". There are exceptions to this but maybe we can - # ignore them. - # - # The only over hints we get from Coveralls about "build_num" are: - # - # * An example value of `1234` - # - # * Another example value of `$BUILD_NUMBER` where BUILD_NUMBER is - # not defined anywhere. - # - # Starting from the CircleCI workflow example, then, and looking at - # the environment variables GitHub Actions offers - # (https://docs.github.com/en/free-pro-team@latest/actions/reference/environment-variables#default-environment-variables) there are two of interest: - # - # * GITHUB_RUN_ID - A unique number for each run within a - # repository. This number does not change if you re-run the - # workflow run. - # - # * GITHUB_RUN_NUMBER - A unique number for each run of a particular - # workflow in a repository. This number begins at 1 for the - # workflow's first run, and increments with each new run. This - # number does not change if you re-run the workflow run. - # - # These seem to offer approximately the same value and only differ - # on whether they will be unique for the project as a whole or just - # for this particular workflow ("ci", as defined by this file). - # - # Unfortunately neither of them changes if a workflow is re-run. - # This differs from the behavior of CircleCI's CIRCLE_WORKFLOW_ID - # where a new value is assigned if a workflow is re-run. - # - # The consequence of this would seem to be that multiple runs of a - # GitHub Actions workflow will have their coverage reported to the - # same job. And since we eventually "finish" a job, later runs - # would be discarded (I suppose). - # - # There doesn't seem to be a way to do any better, though. - # - # However, we have the further constraint that our build_num must - # agree with whatever coveralls-python has selected. An inspection - # of the coveralls-python source suggests that GITHUB_RUN_ID is - # used: - # - # * https://github.com/coveralls-clients/coveralls-python/blob/a9b36299ce9ba3bb6858700781881029d82e545d/coveralls/api.py#L105-L109 - # * https://github.com/coveralls-clients/coveralls-python/blob/a9b36299ce9ba3bb6858700781881029d82e545d/coveralls/api.py#L54-L59 - # - # However, observation of the coveralls.io web interface, logs from - # the coveralls command in action, and experimentation suggests the - # value instead is something more like: - # - # -PR- - # - # For PRs it is the merge commit (`refs/pull//merge`). For - # branches, the tip. + # For branches, it's just the git branch tip hash. # For pull requests, refs/pull//merge was just checked out # by so HEAD will refer to the right revision. For branches, HEAD From 5614c4c3f4a80699dc3700f72068b5d3a6464ed3 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 8 Jan 2021 08:27:00 -0500 Subject: [PATCH 030/201] improve this comment marginally just looking for an excuse to trigger another build and see if inviting "coveralls" to be a collaborator on LeastAuthority/tahoe-lafs fixes the status reporting issue. --- .github/workflows/ci.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5c7b21151..f690d11ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -76,7 +76,9 @@ jobs: # Mark the data as just one piece of many because we have more than # one instance of this job (Windows, macOS) which collects and # reports coverage. This is necessary to cause Coveralls to merge - # multiple coverage results into a single report. + # multiple coverage results into a single report. Note the merge + # only happens when we "finish" a particular build, as identified by + # its "build_num" (aka "service_number"). COVERALLS_PARALLEL: true # Tell Coveralls that we're done reporting coverage data. Since we're using From e72c93a9826f7950cedfcdffaf2c631cd7b0a7b6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Fri, 8 Jan 2021 08:52:38 -0500 Subject: [PATCH 031/201] explain why we're not using the github action here --- .github/workflows/ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f690d11ea..4f63ed19a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,7 +59,10 @@ jobs: name: eliot.log path: eliot.log - # Upload this job's coverage data to Coveralls. + # Upload this job's coverage data to Coveralls. While there is a GitHub + # Action for this, as of Jan 2021 it does not support Python coverage + # files - only lcov files. Therefore, we use coveralls-python, the + # coveralls.io-supplied Python reporter, for this. - name: "Report Coverage to Coveralls" run: | pip install coveralls From 891ef3eefdf330a2f7fecffc6ac9aba23385a249 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sat, 9 Jan 2021 18:19:04 -0500 Subject: [PATCH 032/201] news fragment --- newsfragments/3588.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3588.minor diff --git a/newsfragments/3588.minor b/newsfragments/3588.minor new file mode 100644 index 000000000..e69de29bb From d78e72595a044a426f07d5b06b4410b89a40b3c9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sat, 9 Jan 2021 18:19:09 -0500 Subject: [PATCH 033/201] Use SetErrorMode and related constants from pywin32 --- src/allmydata/windows/fixups.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index e7f045b95..a7552b377 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -11,19 +11,19 @@ def initialize(): import codecs, re from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error - from ctypes.wintypes import BOOL, HANDLE, DWORD, UINT, LPWSTR, LPCWSTR, LPVOID + from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID from allmydata.util import log from allmydata.util.encodingutil import canonical_encoding # - SetErrorMode = WINFUNCTYPE( - UINT, UINT, - use_last_error=True - )(("SetErrorMode", windll.kernel32)) - - SEM_FAILCRITICALERRORS = 0x0001 - SEM_NOOPENFILEERRORBOX = 0x8000 + from win32api import ( + SetErrorMode, + ) + from win32con import ( + SEM_FAILCRITICALERRORS, + SEM_NOOPENFILEERRORBOX, + ) SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) From e80bd6894ff823d17d22adb77cc08e05df036913 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 10:39:17 -0500 Subject: [PATCH 034/201] Take a first attempt at testing the argv logic directly --- src/allmydata/test/test_windows.py | 117 +++++++++++++++++++++++++++++ src/allmydata/windows/fixups.py | 63 ++++++++++------ 2 files changed, 159 insertions(+), 21 deletions(-) create mode 100644 src/allmydata/test/test_windows.py diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py new file mode 100644 index 000000000..0eb4de568 --- /dev/null +++ b/src/allmydata/test/test_windows.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +# Tahoe-LAFS -- secure, distributed storage grid +# +# Copyright © 2020 The Tahoe-LAFS Software Foundation +# +# This file is part of Tahoe-LAFS. +# +# See the docs/about.rst file for licensing information. + +""" +Tests for the ``allmydata.windows``. +""" + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +from sys import ( + executable, +) +from json import ( + load, +) + +from twisted.python.filepath import ( + FilePath, +) +from twisted.python.runtime import ( + platform, +) + +from testtools import ( + skipUnless, +) + +from testtools.matchers import ( + MatchesAll, + AllMatch, + IsInstance, + Equals, +) + +from hypothesis import ( + given, +) + +from hypothesis.strategies import ( + lists, + text, +) + +from subprocess import ( + check_call, +) + +from .common import ( + SyncTestCase, +) + +from ..windows.fixups import ( + get_argv, +) + +@skipUnless(platform.isWindows()) +class GetArgvTests(SyncTestCase): + """ + Tests for ``get_argv``. + """ + def test_get_argv_return_type(self): + """ + ``get_argv`` returns a list of unicode strings + """ + # We don't know what this process's command line was so we just make + # structural assertions here. + argv = get_argv() + self.assertThat( + argv, + MatchesAll([ + IsInstance(list), + AllMatch(IsInstance(str)), + ]), + ) + + @given(lists(text(max_size=4), max_size=4)) + def test_argv_values(self, argv): + """ + ``get_argv`` returns a list representing the result of tokenizing the + "command line" argument string provided to Windows processes. + """ + save_argv = FilePath(self.mktemp()) + saved_argv_path = FilePath(self.mktemp()) + with open(save_argv.path, "wt") as f: + f.write( + """ + import sys + import json + with open({!r}, "wt") as f: + f.write(json.dumps(sys.argv)) + """.format(saved_argv_path.path), + ) + check_call([ + executable, + save_argv, + ] + argv) + + with open(saved_argv_path, "rt") as f: + saved_argv = load(f) + + self.assertThat( + argv, + Equals(saved_argv), + ) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index a7552b377..2cdb1ad93 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -2,6 +2,43 @@ from __future__ import print_function done = False +def get_argv(): + """ + :return [unicode]: The argument list this process was invoked with, as + unicode. + + Python 2 does not do a good job exposing this information in + ``sys.argv`` on Windows so this code re-retrieves the underlying + information using Windows API calls and massages it into the right + shape. + """ + # + from win32ui import ( + GetCommandLine, + ) + + from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error + from ctypes.wintypes import LPWSTR, LPCWSTR + + # + CommandLineToArgvW = WINFUNCTYPE( + POINTER(LPWSTR), LPCWSTR, POINTER(c_int), + use_last_error=True + )(("CommandLineToArgvW", windll.shell32)) + + argc = c_int(0) + argv_unicode = CommandLineToArgvW(GetCommandLine(), byref(argc)) + if argv_unicode is None: + raise WinError(get_last_error()) + + # Convert it to a normal Python list + return list( + argv_unicode[i] + for i + in range(argc.value) + ) + + def initialize(): global done import sys @@ -10,8 +47,8 @@ def initialize(): done = True import codecs, re - from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error - from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID + from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, get_last_error + from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPVOID from allmydata.util import log from allmydata.util.encodingutil import canonical_encoding @@ -195,23 +232,6 @@ def initialize(): # This works around . - # - GetCommandLineW = WINFUNCTYPE( - LPWSTR, - use_last_error=True - )(("GetCommandLineW", windll.kernel32)) - - # - CommandLineToArgvW = WINFUNCTYPE( - POINTER(LPWSTR), LPCWSTR, POINTER(c_int), - use_last_error=True - )(("CommandLineToArgvW", windll.shell32)) - - argc = c_int(0) - argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) - if argv_unicode is None: - raise WinError(get_last_error()) - # Because of (and similar limitations in # twisted), the 'bin/tahoe' script cannot invoke us with the actual Unicode arguments. # Instead it "mangles" or escapes them using \x7F as an escape character, which we @@ -219,11 +239,12 @@ def initialize(): def unmangle(s): return re.sub(u'\\x7F[0-9a-fA-F]*\\;', lambda m: unichr(int(m.group(0)[1:-1], 16)), s) + argv_unicode = get_argv() try: - argv = [unmangle(argv_unicode[i]).encode('utf-8') for i in xrange(0, argc.value)] + argv = [unmangle(argv_u).encode('utf-8') for argv_u in argv_unicode] except Exception as e: _complain("%s: could not unmangle Unicode arguments.\n%r" - % (sys.argv[0], [argv_unicode[i] for i in xrange(0, argc.value)])) + % (sys.argv[0], argv_unicode)) raise # Take only the suffix with the same number of arguments as sys.argv. From 24f3d74fdf55f4ae4e413e739bd1f63303520910 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 10:48:40 -0500 Subject: [PATCH 035/201] Fix the skip --- src/allmydata/test/test_windows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 0eb4de568..c1c61696c 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -66,7 +66,7 @@ from ..windows.fixups import ( get_argv, ) -@skipUnless(platform.isWindows()) +@skipUnless(platform.isWindows(), "get_argv is Windows-only") class GetArgvTests(SyncTestCase): """ Tests for ``get_argv``. From 6b621efef27bf73ef763bc49c7e91ebc5e82cb73 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 10:48:49 -0500 Subject: [PATCH 036/201] Turns out there is also CommandLineToArgv just not CommandLineToArgvW, but that's fine. --- src/allmydata/windows/fixups.py | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 2cdb1ad93..9fb81bdff 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -13,30 +13,12 @@ def get_argv(): shape. """ # - from win32ui import ( + from win32api import ( GetCommandLine, + CommandLineToArgv, ) + return CommandLineToArgv(GetCommandLine()) - from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error - from ctypes.wintypes import LPWSTR, LPCWSTR - - # - CommandLineToArgvW = WINFUNCTYPE( - POINTER(LPWSTR), LPCWSTR, POINTER(c_int), - use_last_error=True - )(("CommandLineToArgvW", windll.shell32)) - - argc = c_int(0) - argv_unicode = CommandLineToArgvW(GetCommandLine(), byref(argc)) - if argv_unicode is None: - raise WinError(get_last_error()) - - # Convert it to a normal Python list - return list( - argv_unicode[i] - for i - in range(argc.value) - ) def initialize(): From b3a6f25c1c486f02e22373b1cc53df57fc2d5c2b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 11:01:30 -0500 Subject: [PATCH 037/201] Python 2 gets an old version with no CommandLineToArgv Thanks. --- src/allmydata/windows/fixups.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 9fb81bdff..2cdb1ad93 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -13,12 +13,30 @@ def get_argv(): shape. """ # - from win32api import ( + from win32ui import ( GetCommandLine, - CommandLineToArgv, ) - return CommandLineToArgv(GetCommandLine()) + from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error + from ctypes.wintypes import LPWSTR, LPCWSTR + + # + CommandLineToArgvW = WINFUNCTYPE( + POINTER(LPWSTR), LPCWSTR, POINTER(c_int), + use_last_error=True + )(("CommandLineToArgvW", windll.shell32)) + + argc = c_int(0) + argv_unicode = CommandLineToArgvW(GetCommandLine(), byref(argc)) + if argv_unicode is None: + raise WinError(get_last_error()) + + # Convert it to a normal Python list + return list( + argv_unicode[i] + for i + in range(argc.value) + ) def initialize(): From a73668a056832c588345be4a9ab832130b5dbf5f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 11:21:13 -0500 Subject: [PATCH 038/201] this doesn't take a list --- src/allmydata/test/test_windows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index c1c61696c..21932ac48 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -80,10 +80,10 @@ class GetArgvTests(SyncTestCase): argv = get_argv() self.assertThat( argv, - MatchesAll([ + MatchesAll( IsInstance(list), AllMatch(IsInstance(str)), - ]), + ), ) @given(lists(text(max_size=4), max_size=4)) From b02b930eed079d58ef949ecd758db874652ab859 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 11:22:00 -0500 Subject: [PATCH 039/201] do better with paths --- src/allmydata/test/test_windows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 21932ac48..cd57df690 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -105,10 +105,10 @@ class GetArgvTests(SyncTestCase): ) check_call([ executable, - save_argv, + save_argv.path, ] + argv) - with open(saved_argv_path, "rt") as f: + with open(saved_argv_path.path, "rt") as f: saved_argv = load(f) self.assertThat( From 6091ca2164c299641adcdb86971cef310dcf8957 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 11:23:19 -0500 Subject: [PATCH 040/201] try to get the child source right --- src/allmydata/test/test_windows.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index cd57df690..e13fa9b16 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -26,7 +26,9 @@ from sys import ( from json import ( load, ) - +from textwrap import ( + dedent, +) from twisted.python.filepath import ( FilePath, ) @@ -95,13 +97,13 @@ class GetArgvTests(SyncTestCase): save_argv = FilePath(self.mktemp()) saved_argv_path = FilePath(self.mktemp()) with open(save_argv.path, "wt") as f: - f.write( + f.write(dedent( """ import sys import json with open({!r}, "wt") as f: f.write(json.dumps(sys.argv)) - """.format(saved_argv_path.path), + """.format(saved_argv_path.path)), ) check_call([ executable, From e64a4c64269263b16bfc7048c8f9b6a19d5c30a9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 19:59:22 -0500 Subject: [PATCH 041/201] Attempt to use a parent-side API that supports unicode properly --- src/allmydata/test/_win_subprocess.py | 159 ++++++++++++++++++++++++++ src/allmydata/test/test_windows.py | 23 ++-- 2 files changed, 173 insertions(+), 9 deletions(-) create mode 100644 src/allmydata/test/_win_subprocess.py diff --git a/src/allmydata/test/_win_subprocess.py b/src/allmydata/test/_win_subprocess.py new file mode 100644 index 000000000..cc66f7552 --- /dev/null +++ b/src/allmydata/test/_win_subprocess.py @@ -0,0 +1,159 @@ +## issue: https://bugs.python.org/issue19264 + +import os +import ctypes +import subprocess +import _subprocess +from ctypes import byref, windll, c_char_p, c_wchar_p, c_void_p, \ + Structure, sizeof, c_wchar, WinError +from ctypes.wintypes import BYTE, WORD, LPWSTR, BOOL, DWORD, LPVOID, \ + HANDLE + + +## +## Types +## + +CREATE_UNICODE_ENVIRONMENT = 0x00000400 +LPCTSTR = c_char_p +LPTSTR = c_wchar_p +LPSECURITY_ATTRIBUTES = c_void_p +LPBYTE = ctypes.POINTER(BYTE) + +class STARTUPINFOW(Structure): + _fields_ = [ + ("cb", DWORD), ("lpReserved", LPWSTR), + ("lpDesktop", LPWSTR), ("lpTitle", LPWSTR), + ("dwX", DWORD), ("dwY", DWORD), + ("dwXSize", DWORD), ("dwYSize", DWORD), + ("dwXCountChars", DWORD), ("dwYCountChars", DWORD), + ("dwFillAtrribute", DWORD), ("dwFlags", DWORD), + ("wShowWindow", WORD), ("cbReserved2", WORD), + ("lpReserved2", LPBYTE), ("hStdInput", HANDLE), + ("hStdOutput", HANDLE), ("hStdError", HANDLE), + ] + +LPSTARTUPINFOW = ctypes.POINTER(STARTUPINFOW) + + +class PROCESS_INFORMATION(Structure): + _fields_ = [ + ("hProcess", HANDLE), ("hThread", HANDLE), + ("dwProcessId", DWORD), ("dwThreadId", DWORD), + ] + +LPPROCESS_INFORMATION = ctypes.POINTER(PROCESS_INFORMATION) + + +class DUMMY_HANDLE(ctypes.c_void_p): + + def __init__(self, *a, **kw): + super(DUMMY_HANDLE, self).__init__(*a, **kw) + self.closed = False + + def Close(self): + if not self.closed: + windll.kernel32.CloseHandle(self) + self.closed = True + + def __int__(self): + return self.value + + +CreateProcessW = windll.kernel32.CreateProcessW +CreateProcessW.argtypes = [ + LPCTSTR, LPTSTR, LPSECURITY_ATTRIBUTES, + LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPCTSTR, + LPSTARTUPINFOW, LPPROCESS_INFORMATION, +] +CreateProcessW.restype = BOOL + + +## +## Patched functions/classes +## + +def CreateProcess(executable, args, _p_attr, _t_attr, + inherit_handles, creation_flags, env, cwd, + startup_info): + """Create a process supporting unicode executable and args for win32 + + Python implementation of CreateProcess using CreateProcessW for Win32 + + """ + + si = STARTUPINFOW( + dwFlags=startup_info.dwFlags, + wShowWindow=startup_info.wShowWindow, + cb=sizeof(STARTUPINFOW), + ## XXXvlab: not sure of the casting here to ints. + hStdInput=int(startup_info.hStdInput), + hStdOutput=int(startup_info.hStdOutput), + hStdError=int(startup_info.hStdError), + ) + + wenv = None + if env is not None: + ## LPCWSTR seems to be c_wchar_p, so let's say CWSTR is c_wchar + env = (unicode("").join([ + unicode("%s=%s\0") % (k, v) + for k, v in env.items()])) + unicode("\0") + wenv = (c_wchar * len(env))() + wenv.value = env + + pi = PROCESS_INFORMATION() + creation_flags |= CREATE_UNICODE_ENVIRONMENT + + if CreateProcessW(executable, args, None, None, + inherit_handles, creation_flags, + wenv, cwd, byref(si), byref(pi)): + return (DUMMY_HANDLE(pi.hProcess), DUMMY_HANDLE(pi.hThread), + pi.dwProcessId, pi.dwThreadId) + raise WinError() + + +class Popen(subprocess.Popen): + """This superseeds Popen and corrects a bug in cPython 2.7 implem""" + + def _execute_child(self, args, executable, preexec_fn, close_fds, + cwd, env, universal_newlines, + startupinfo, creationflags, shell, to_close, + p2cread, p2cwrite, + c2pread, c2pwrite, + errread, errwrite): + """Code from part of _execute_child from Python 2.7 (9fbb65e) + + There are only 2 little changes concerning the construction of + the the final string in shell mode: we preempt the creation of + the command string when shell is True, because original function + will try to encode unicode args which we want to avoid to be able to + sending it as-is to ``CreateProcess``. + + """ + if not isinstance(args, subprocess.types.StringTypes): + args = subprocess.list2cmdline(args) + + if startupinfo is None: + startupinfo = subprocess.STARTUPINFO() + if shell: + startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW + startupinfo.wShowWindow = _subprocess.SW_HIDE + comspec = os.environ.get("COMSPEC", unicode("cmd.exe")) + args = unicode('{} /c "{}"').format(comspec, args) + if (_subprocess.GetVersion() >= 0x80000000 or + os.path.basename(comspec).lower() == "command.com"): + w9xpopen = self._find_w9xpopen() + args = unicode('"%s" %s') % (w9xpopen, args) + creationflags |= _subprocess.CREATE_NEW_CONSOLE + + cp = _subprocess.CreateProcess + _subprocess.CreateProcess = CreateProcess + try: + super(Popen, self)._execute_child( + args, executable, + preexec_fn, close_fds, cwd, env, universal_newlines, + startupinfo, creationflags, False, to_close, p2cread, + p2cwrite, c2pread, c2pwrite, errread, errwrite, + ) + finally: + _subprocess.CreateProcess = cp diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index e13fa9b16..bb9aa96bf 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -56,10 +56,6 @@ from hypothesis.strategies import ( text, ) -from subprocess import ( - check_call, -) - from .common import ( SyncTestCase, ) @@ -97,6 +93,10 @@ class GetArgvTests(SyncTestCase): save_argv = FilePath(self.mktemp()) saved_argv_path = FilePath(self.mktemp()) with open(save_argv.path, "wt") as f: + # A simple program to save argv to a file. Using the file saves + # us having to figure out how to reliably get non-ASCII back over + # stdio which may pose an independent set of challenges. At least + # file I/O is relatively simple and well-understood. f.write(dedent( """ import sys @@ -105,11 +105,16 @@ class GetArgvTests(SyncTestCase): f.write(json.dumps(sys.argv)) """.format(saved_argv_path.path)), ) - check_call([ - executable, - save_argv.path, - ] + argv) - + # Python 2.7 doesn't have good options for launching a process with + # non-ASCII in its command line. + from ._win_subprocess import ( + Popen + ) + returncode = Popen([executable, save_argv] + argv).wait() + self.assertThat( + 0, + Equals(returncode), + ) with open(saved_argv_path.path, "rt") as f: saved_argv = load(f) From a21b66e775d4767fbac3272f48ba243e2e13ecdc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 20:06:07 -0500 Subject: [PATCH 042/201] FilePath again --- src/allmydata/test/test_windows.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index bb9aa96bf..b0231987d 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -90,9 +90,9 @@ class GetArgvTests(SyncTestCase): ``get_argv`` returns a list representing the result of tokenizing the "command line" argument string provided to Windows processes. """ - save_argv = FilePath(self.mktemp()) + save_argv_path = FilePath(self.mktemp()) saved_argv_path = FilePath(self.mktemp()) - with open(save_argv.path, "wt") as f: + with open(save_argv_path.path, "wt") as f: # A simple program to save argv to a file. Using the file saves # us having to figure out how to reliably get non-ASCII back over # stdio which may pose an independent set of challenges. At least @@ -110,7 +110,7 @@ class GetArgvTests(SyncTestCase): from ._win_subprocess import ( Popen ) - returncode = Popen([executable, save_argv] + argv).wait() + returncode = Popen([executable, save_argv_path.path] + argv).wait() self.assertThat( 0, Equals(returncode), From 18de71666f212788c20f1ef673b259a014118f18 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 20:10:34 -0500 Subject: [PATCH 043/201] try to work-around bugs in the Popen hotfix --- src/allmydata/test/test_windows.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index b0231987d..ae62af857 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -110,7 +110,10 @@ class GetArgvTests(SyncTestCase): from ._win_subprocess import ( Popen ) - returncode = Popen([executable, save_argv_path.path] + argv).wait() + from subprocess import ( + PIPE, + ) + returncode = Popen([executable, save_argv_path.path] + argv, stdin=PIPE, stdout=PIPE, stderr=PIPE).wait() self.assertThat( 0, Equals(returncode), From 77c9a2c2f55a3c47fc93924c61f039cb5021255a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 20:13:17 -0500 Subject: [PATCH 044/201] make the failures a little nicer --- src/allmydata/test/test_windows.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index ae62af857..fc40db60c 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -84,14 +84,16 @@ class GetArgvTests(SyncTestCase): ), ) - @given(lists(text(max_size=4), max_size=4)) + @given(lists(text(min_size=1, max_size=4), min_size=1, max_size=4)) def test_argv_values(self, argv): """ ``get_argv`` returns a list representing the result of tokenizing the "command line" argument string provided to Windows processes. """ - save_argv_path = FilePath(self.mktemp()) - saved_argv_path = FilePath(self.mktemp()) + working_path = self.mktemp() + working_path.makedirs() + save_argv_path = working_path.child("script.py") + saved_argv_path = working_path.child("data.json") with open(save_argv_path.path, "wt") as f: # A simple program to save argv to a file. Using the file saves # us having to figure out how to reliably get non-ASCII back over From 360b20a98169663be22771d84d623819beabecd6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 20:14:05 -0500 Subject: [PATCH 045/201] FilePath again --- src/allmydata/test/test_windows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index fc40db60c..63bb3c09e 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -90,7 +90,7 @@ class GetArgvTests(SyncTestCase): ``get_argv`` returns a list representing the result of tokenizing the "command line" argument string provided to Windows processes. """ - working_path = self.mktemp() + working_path = FilePath(self.mktemp()) working_path.makedirs() save_argv_path = working_path.child("script.py") saved_argv_path = working_path.child("data.json") From 28435d65c16dc3a3ac1c3433f461a48384031b2d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 20:16:25 -0500 Subject: [PATCH 046/201] test the SUT --- src/allmydata/test/test_windows.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 63bb3c09e..c91a2d462 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -101,10 +101,12 @@ class GetArgvTests(SyncTestCase): # file I/O is relatively simple and well-understood. f.write(dedent( """ - import sys + from allmydata.windows.fixups import ( + get_argv, + ) import json with open({!r}, "wt") as f: - f.write(json.dumps(sys.argv)) + f.write(json.dumps(get_argv())) """.format(saved_argv_path.path)), ) # Python 2.7 doesn't have good options for launching a process with From 3bde012ea14f86ff2ec588db997f0dd419b225fe Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 20:18:00 -0500 Subject: [PATCH 047/201] Create a better expectation If we pass all this stuff to Popen we should expect to see it from get_argv() right? --- src/allmydata/test/test_windows.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index c91a2d462..81638f6e4 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -117,7 +117,8 @@ class GetArgvTests(SyncTestCase): from subprocess import ( PIPE, ) - returncode = Popen([executable, save_argv_path.path] + argv, stdin=PIPE, stdout=PIPE, stderr=PIPE).wait() + argv = [executable, save_argv_path.path] + argv + returncode = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE).wait() self.assertThat( 0, Equals(returncode), From b5f0e21ef845cd486d7c1ba767f07612767f1b53 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Sun, 10 Jan 2021 20:19:15 -0500 Subject: [PATCH 048/201] testtools convention - actual value comes first --- src/allmydata/test/test_windows.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 81638f6e4..fddb60389 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -120,13 +120,13 @@ class GetArgvTests(SyncTestCase): argv = [executable, save_argv_path.path] + argv returncode = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE).wait() self.assertThat( - 0, - Equals(returncode), + returncode, + Equals(0), ) with open(saved_argv_path.path, "rt") as f: saved_argv = load(f) self.assertThat( - argv, - Equals(saved_argv), + saved_argv, + Equals(argv), ) From 30c79bf6787132634ddee2f45ef523c40c8b2abb Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 09:51:36 -0500 Subject: [PATCH 049/201] make sure executable is unicode too, if that matters --- src/allmydata/test/test_windows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index fddb60389..1d8173de7 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -117,7 +117,7 @@ class GetArgvTests(SyncTestCase): from subprocess import ( PIPE, ) - argv = [executable, save_argv_path.path] + argv + argv = [executable.decode("utf-8"), save_argv_path.path] + argv returncode = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE).wait() self.assertThat( returncode, From 42f1930914c7247bfc38d68fa94e17b3e80ee6ef Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 10:08:15 -0500 Subject: [PATCH 050/201] disambiguate this a bit --- src/allmydata/test/test_windows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 1d8173de7..5aad36c0b 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -128,5 +128,5 @@ class GetArgvTests(SyncTestCase): self.assertThat( saved_argv, - Equals(argv), + Equals([u"expected"] + argv), ) From e2f396445170fa85d499d1b33e7c1d77dd5df16d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 10:09:54 -0500 Subject: [PATCH 051/201] okay this is indeed the expected --- src/allmydata/test/test_windows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 5aad36c0b..1d8173de7 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -128,5 +128,5 @@ class GetArgvTests(SyncTestCase): self.assertThat( saved_argv, - Equals([u"expected"] + argv), + Equals(argv), ) From 389d70a682825327e6cde32d9d401fb118c90e20 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 10:17:02 -0500 Subject: [PATCH 052/201] see if GetCommandLine() value is interesting --- src/allmydata/test/test_windows.py | 11 ++++++++++- src/allmydata/windows/fixups.py | 6 +++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 1d8173de7..41a71150d 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -49,6 +49,7 @@ from testtools.matchers import ( from hypothesis import ( given, + note, ) from hypothesis.strategies import ( @@ -118,7 +119,15 @@ class GetArgvTests(SyncTestCase): PIPE, ) argv = [executable.decode("utf-8"), save_argv_path.path] + argv - returncode = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE).wait() + p = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE) + p.stdin.close() + stdout = p.stdout.read() + stderr = p.stderr.read() + returncode = p.wait() + + note("stdout: {!r}".format(stdout)) + note("stderr: {!r}".format(stderr)) + self.assertThat( returncode, Equals(0), diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 2cdb1ad93..8537e06b5 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -26,8 +26,12 @@ def get_argv(): use_last_error=True )(("CommandLineToArgvW", windll.shell32)) + import sys + + command_line = GetCommandLine() + print("GetCommandLine() -> {!r}".format(command_line), file=sys.stderr) argc = c_int(0) - argv_unicode = CommandLineToArgvW(GetCommandLine(), byref(argc)) + argv_unicode = CommandLineToArgvW(command_line, byref(argc)) if argv_unicode is None: raise WinError(get_last_error()) From 33f84412b4462f16cdb4115d41e575ca2ecd6f7a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 10:19:17 -0500 Subject: [PATCH 053/201] maybe pywin32 GetCommandLine is not really GetCommandLineW --- src/allmydata/windows/fixups.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 8537e06b5..f91232457 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -12,14 +12,16 @@ def get_argv(): information using Windows API calls and massages it into the right shape. """ - # - from win32ui import ( - GetCommandLine, - ) from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error from ctypes.wintypes import LPWSTR, LPCWSTR + # + GetCommandLineW = WINFUNCTYPE( + LPWSTR, + use_last_error=True + )(("GetCommandLineW", windll.kernel32)) + # CommandLineToArgvW = WINFUNCTYPE( POINTER(LPWSTR), LPCWSTR, POINTER(c_int), @@ -28,8 +30,8 @@ def get_argv(): import sys - command_line = GetCommandLine() - print("GetCommandLine() -> {!r}".format(command_line), file=sys.stderr) + command_line = GetCommandLineW() + print("GetCommandLineW() -> {!r}".format(command_line), file=sys.stderr) argc = c_int(0) argv_unicode = CommandLineToArgvW(command_line, byref(argc)) if argv_unicode is None: From c2e8d94a7389c6f840787e7cabdc125b8501bf58 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 10:24:25 -0500 Subject: [PATCH 054/201] don't fail this test because it is slow --- src/allmydata/test/test_windows.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 41a71150d..1d5c2632d 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -48,6 +48,8 @@ from testtools.matchers import ( ) from hypothesis import ( + HealthCheck, + settings, given, note, ) @@ -85,6 +87,12 @@ class GetArgvTests(SyncTestCase): ), ) + @settings( + # This test runs a child process. This is unavoidably slow and + # variable. Disable the two time-based Hypothesis health checks. + suppress_health_check=[HealthCheck.too_slow], + deadline=None, + ) @given(lists(text(min_size=1, max_size=4), min_size=1, max_size=4)) def test_argv_values(self, argv): """ From 6d499dea53c76de714a70488a776ddd8daf91500 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 10:29:59 -0500 Subject: [PATCH 055/201] exclude nul from the tested argv values --- src/allmydata/test/test_windows.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 1d5c2632d..6e46a7e8f 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -57,6 +57,7 @@ from hypothesis import ( from hypothesis.strategies import ( lists, text, + characters, ) from .common import ( @@ -93,7 +94,23 @@ class GetArgvTests(SyncTestCase): suppress_health_check=[HealthCheck.too_slow], deadline=None, ) - @given(lists(text(min_size=1, max_size=4), min_size=1, max_size=4)) + @given( + lists( + text( + alphabet=characters( + blacklist_categories=('Cs',), + # Windows CommandLine is a null-terminated string, + # analogous to POSIX exec* arguments. So exclude nul from + # our generated arguments. + blacklist_characters=('\x00',), + ), + min_size=1, + max_size=4, + ), + min_size=1, + max_size=4, + ), + ) def test_argv_values(self, argv): """ ``get_argv`` returns a list representing the result of tokenizing the From a0aa3fe2960e64aa8c0717ae277079b473c4c085 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:12:12 -0500 Subject: [PATCH 056/201] try testing UnicodeOutput --- src/allmydata/test/test_windows.py | 56 ++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 6e46a7e8f..94b9a0f7d 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -29,6 +29,10 @@ from json import ( from textwrap import ( dedent, ) +from subprocess import ( + Popen, +) + from twisted.python.filepath import ( FilePath, ) @@ -56,6 +60,7 @@ from hypothesis import ( from hypothesis.strategies import ( lists, + tuples, text, characters, ) @@ -164,3 +169,54 @@ class GetArgvTests(SyncTestCase): saved_argv, Equals(argv), ) + + +class UnicodeOutputTests(SyncTestCase): + """ + Tests for writing unicode to stdout and stderr. + """ + @given(tuples(characters(), characters())) + def test_write_non_ascii(self, stdout_char, stderr_char): + """ + Non-ASCII unicode characters can be written to stdout and stderr with + automatic UTF-8 encoding. + """ + working_path = FilePath(self.mktemp()) + script = working_path.child("script.py") + script.setContent(dedent( + """ + from future.utils import PY2 + if PY2: + from future.builtins chr + + from allmydata.windows.fixups import initialize + initialize() + + # XXX A shortcoming of the monkey-patch approach is that you'd + # better not iport stdout or stderr before you call initialize. + from sys import argv, stdout, stderr + + stdout.write(chr(int(argv[1]))) + stdout.close() + stderr.write(chr(int(argv[2]))) + stderr.close() + """ + )) + p = Popen([ + executable, + script.path, + str(ord(stdout_char)), + str(ord(stderr_char)), + ]) + stdout = p.stdout.read() + stderr = p.stderr.read() + returncode = p.wait() + + self.assertThat( + (stdout, stderr, returncode), + Equals(( + stdout_char, + stderr_char, + 0, + )), + ) From 08d56c87b417c5c7aac7dd0a8dfa83a5fc33eccd Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:13:45 -0500 Subject: [PATCH 057/201] that was silly --- src/allmydata/test/test_windows.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 94b9a0f7d..ef113d9e8 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -60,7 +60,6 @@ from hypothesis import ( from hypothesis.strategies import ( lists, - tuples, text, characters, ) @@ -175,7 +174,7 @@ class UnicodeOutputTests(SyncTestCase): """ Tests for writing unicode to stdout and stderr. """ - @given(tuples(characters(), characters())) + @given(characters(), characters()) def test_write_non_ascii(self, stdout_char, stderr_char): """ Non-ASCII unicode characters can be written to stdout and stderr with From 221f1640a586781af7833655ce0add7bc6671419 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:14:32 -0500 Subject: [PATCH 058/201] make the container --- src/allmydata/test/test_windows.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index ef113d9e8..02b05e5d1 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -181,6 +181,7 @@ class UnicodeOutputTests(SyncTestCase): automatic UTF-8 encoding. """ working_path = FilePath(self.mktemp()) + working_path.makedirs() script = working_path.child("script.py") script.setContent(dedent( """ From 504b2f5b1f347673fa21424854083e72cfb79b23 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:15:20 -0500 Subject: [PATCH 059/201] get the syntax right --- src/allmydata/test/test_windows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 02b05e5d1..205311f9a 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -187,7 +187,7 @@ class UnicodeOutputTests(SyncTestCase): """ from future.utils import PY2 if PY2: - from future.builtins chr + from future.builtins import chr from allmydata.windows.fixups import initialize initialize() From 8fa1b6bb1e275f5e72d6fc85984d768acb3e1ddd Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:15:47 -0500 Subject: [PATCH 060/201] make stdout/stderr available --- src/allmydata/test/test_windows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 205311f9a..89b71eb17 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -207,7 +207,7 @@ class UnicodeOutputTests(SyncTestCase): script.path, str(ord(stdout_char)), str(ord(stderr_char)), - ]) + ], stdout=PIPE, stderr=PIPE) stdout = p.stdout.read() stderr = p.stderr.read() returncode = p.wait() From 23d1d7624254735d99da609de22f3944aaff6020 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:16:23 -0500 Subject: [PATCH 061/201] get the name --- src/allmydata/test/test_windows.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 89b71eb17..a5ceb4883 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -30,6 +30,7 @@ from textwrap import ( dedent, ) from subprocess import ( + PIPE, Popen, ) @@ -144,9 +145,6 @@ class GetArgvTests(SyncTestCase): from ._win_subprocess import ( Popen ) - from subprocess import ( - PIPE, - ) argv = [executable.decode("utf-8"), save_argv_path.path] + argv p = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE) p.stdin.close() From f4a1a6fd97656693b7aebb3263763714397f5e2f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:16:49 -0500 Subject: [PATCH 062/201] get rid of this noise --- src/allmydata/windows/fixups.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index f91232457..3d9dc6afb 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -28,10 +28,7 @@ def get_argv(): use_last_error=True )(("CommandLineToArgvW", windll.shell32)) - import sys - command_line = GetCommandLineW() - print("GetCommandLineW() -> {!r}".format(command_line), file=sys.stderr) argc = c_int(0) argv_unicode = CommandLineToArgvW(command_line, byref(argc)) if argv_unicode is None: From 3adfb2a1089b31b66610264c6b5ad04110383cd6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:17:57 -0500 Subject: [PATCH 063/201] let it be slow --- src/allmydata/test/test_windows.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index a5ceb4883..857d9d26c 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -73,6 +73,11 @@ from ..windows.fixups import ( get_argv, ) +slow_settings = settings( + suppress_health_check=[HealthCheck.too_slow], + deadline=None, +) + @skipUnless(platform.isWindows(), "get_argv is Windows-only") class GetArgvTests(SyncTestCase): """ @@ -93,12 +98,9 @@ class GetArgvTests(SyncTestCase): ), ) - @settings( - # This test runs a child process. This is unavoidably slow and - # variable. Disable the two time-based Hypothesis health checks. - suppress_health_check=[HealthCheck.too_slow], - deadline=None, - ) + # This test runs a child process. This is unavoidably slow and variable. + # Disable the two time-based Hypothesis health checks. + @slow_settings @given( lists( text( @@ -172,6 +174,7 @@ class UnicodeOutputTests(SyncTestCase): """ Tests for writing unicode to stdout and stderr. """ + @slow_settings @given(characters(), characters()) def test_write_non_ascii(self, stdout_char, stderr_char): """ From a4061619dc5740d961f41a823080c97dc2e2f44d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:32:12 -0500 Subject: [PATCH 064/201] shuffle code around a lot --- src/allmydata/windows/fixups.py | 224 +++++++++++++++++--------------- 1 file changed, 117 insertions(+), 107 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 3d9dc6afb..d7c929de6 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -2,6 +2,40 @@ from __future__ import print_function done = False +from ctypes import WINFUNCTYPE, windll, POINTER, c_int, WinError, byref, get_last_error +from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID + +# +from win32api import ( + SetErrorMode, +) +from win32con import ( + SEM_FAILCRITICALERRORS, + SEM_NOOPENFILEERRORBOX, +) + +# +# BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, +# LPDWORD lpCharsWritten, LPVOID lpReserved); + +WriteConsoleW = WINFUNCTYPE( + BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID, + use_last_error=True +)(("WriteConsoleW", windll.kernel32)) + +# +GetCommandLineW = WINFUNCTYPE( + LPWSTR, + use_last_error=True +)(("GetCommandLineW", windll.kernel32)) + +# +CommandLineToArgvW = WINFUNCTYPE( + POINTER(LPWSTR), LPCWSTR, POINTER(c_int), + use_last_error=True +)(("CommandLineToArgvW", windll.shell32)) + + def get_argv(): """ :return [unicode]: The argument list this process was invoked with, as @@ -12,22 +46,6 @@ def get_argv(): information using Windows API calls and massages it into the right shape. """ - - from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error - from ctypes.wintypes import LPWSTR, LPCWSTR - - # - GetCommandLineW = WINFUNCTYPE( - LPWSTR, - use_last_error=True - )(("GetCommandLineW", windll.kernel32)) - - # - CommandLineToArgvW = WINFUNCTYPE( - POINTER(LPWSTR), LPCWSTR, POINTER(c_int), - use_last_error=True - )(("CommandLineToArgvW", windll.shell32)) - command_line = GetCommandLineW() argc = c_int(0) argv_unicode = CommandLineToArgvW(command_line, byref(argc)) @@ -50,20 +68,9 @@ def initialize(): done = True import codecs, re - from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, get_last_error - from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPVOID + from functools import partial from allmydata.util import log - from allmydata.util.encodingutil import canonical_encoding - - # - from win32api import ( - SetErrorMode, - ) - from win32con import ( - SEM_FAILCRITICALERRORS, - SEM_NOOPENFILEERRORBOX, - ) SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) @@ -73,10 +80,12 @@ def initialize(): # which makes for frustrating debugging if stderr is directed to our wrapper. # So be paranoid about catching errors and reporting them to original_stderr, # so that we can at least see them. - def _complain(message): - print(isinstance(message, str) and message or repr(message), file=original_stderr) + def _complain(output_file, message): + print(isinstance(message, str) and message or repr(message), file=output_file) log.msg(message, level=log.WEIRD) + _complain = partial(_complain, original_stderr) + # Work around . codecs.register(lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) @@ -137,6 +146,9 @@ def initialize(): real_stdout = (old_stdout_fileno == STDOUT_FILENO) real_stderr = (old_stderr_fileno == STDERR_FILENO) + print("real stdout: {}".format(real_stdout)) + print("real stderr: {}".format(real_stderr)) + if real_stdout: hStdout = GetStdHandle(STD_OUTPUT_HANDLE) if not_a_console(hStdout): @@ -148,88 +160,15 @@ def initialize(): real_stderr = False if real_stdout or real_stderr: - # - # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, - # LPDWORD lpCharsWritten, LPVOID lpReserved); - - WriteConsoleW = WINFUNCTYPE( - BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID, - use_last_error=True - )(("WriteConsoleW", windll.kernel32)) - - class UnicodeOutput(object): - def __init__(self, hConsole, stream, fileno, name): - self._hConsole = hConsole - self._stream = stream - self._fileno = fileno - self.closed = False - self.softspace = False - self.mode = 'w' - self.encoding = 'utf-8' - self.name = name - if hasattr(stream, 'encoding') and canonical_encoding(stream.encoding) != 'utf-8': - log.msg("%s: %r had encoding %r, but we're going to write UTF-8 to it" % - (name, stream, stream.encoding), level=log.CURIOUS) - self.flush() - - def isatty(self): - return False - def close(self): - # don't really close the handle, that would only cause problems - self.closed = True - def fileno(self): - return self._fileno - def flush(self): - if self._hConsole is None: - try: - self._stream.flush() - except Exception as e: - _complain("%s.flush: %r from %r" % (self.name, e, self._stream)) - raise - - def write(self, text): - try: - if self._hConsole is None: - if isinstance(text, unicode): - text = text.encode('utf-8') - self._stream.write(text) - else: - if not isinstance(text, unicode): - text = str(text).decode('utf-8') - remaining = len(text) - while remaining > 0: - n = DWORD(0) - # There is a shorter-than-documented limitation on the length of the string - # passed to WriteConsoleW (see #1232). - retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None) - if retval == 0: - raise IOError("WriteConsoleW failed with WinError: %s" % (WinError(get_last_error()),)) - if n.value == 0: - raise IOError("WriteConsoleW returned %r, n.value = 0" % (retval,)) - remaining -= n.value - if remaining == 0: break - text = text[n.value:] - except Exception as e: - _complain("%s.write: %r" % (self.name, e)) - raise - - def writelines(self, lines): - try: - for line in lines: - self.write(line) - except Exception as e: - _complain("%s.writelines: %r" % (self.name, e)) - raise - if real_stdout: - sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '') + sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '', _complain) else: - sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '') + sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '', _complain) if real_stderr: - sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '') + sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '', _complain) else: - sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '') + sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '', _complain) except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) @@ -259,3 +198,74 @@ def initialize(): sys.argv = argv[-len(sys.argv):] if sys.argv[0].endswith('.pyscript'): sys.argv[0] = sys.argv[0][:-9] + + +class UnicodeOutput(object): + def __init__(self, hConsole, stream, fileno, name, _complain): + self._hConsole = hConsole + self._stream = stream + self._fileno = fileno + self.closed = False + self.softspace = False + self.mode = 'w' + self.encoding = 'utf-8' + self.name = name + + self._complain = _complain + + from allmydata.util.encodingutil import canonical_encoding + from allmydata.util import log + if hasattr(stream, 'encoding') and canonical_encoding(stream.encoding) != 'utf-8': + log.msg("%s: %r had encoding %r, but we're going to write UTF-8 to it" % + (name, stream, stream.encoding), level=log.CURIOUS) + self.flush() + + def isatty(self): + return False + def close(self): + # don't really close the handle, that would only cause problems + self.closed = True + def fileno(self): + return self._fileno + def flush(self): + if self._hConsole is None: + try: + self._stream.flush() + except Exception as e: + self._complain("%s.flush: %r from %r" % (self.name, e, self._stream)) + raise + + def write(self, text): + try: + if self._hConsole is None: + if isinstance(text, unicode): + text = text.encode('utf-8') + self._stream.write(text) + else: + if not isinstance(text, unicode): + text = str(text).decode('utf-8') + remaining = len(text) + while remaining > 0: + n = DWORD(0) + # There is a shorter-than-documented limitation on the + # length of the string passed to WriteConsoleW (see + # #1232). + retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None) + if retval == 0: + raise IOError("WriteConsoleW failed with WinError: %s" % (WinError(get_last_error()),)) + if n.value == 0: + raise IOError("WriteConsoleW returned %r, n.value = 0" % (retval,)) + remaining -= n.value + if remaining == 0: break + text = text[n.value:] + except Exception as e: + self._complain("%s.write: %r" % (self.name, e)) + raise + + def writelines(self, lines): + try: + for line in lines: + self.write(line) + except Exception as e: + self._complain("%s.writelines: %r" % (self.name, e)) + raise From 14caaa360c0a2d09303513e267e1e792cb29c43d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:33:09 -0500 Subject: [PATCH 065/201] different debug --- src/allmydata/windows/fixups.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index d7c929de6..f69a840f8 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -146,17 +146,16 @@ def initialize(): real_stdout = (old_stdout_fileno == STDOUT_FILENO) real_stderr = (old_stderr_fileno == STDERR_FILENO) - print("real stdout: {}".format(real_stdout)) - print("real stderr: {}".format(real_stderr)) - if real_stdout: hStdout = GetStdHandle(STD_OUTPUT_HANDLE) if not_a_console(hStdout): + print("stdout not a console") real_stdout = False if real_stderr: hStderr = GetStdHandle(STD_ERROR_HANDLE) if not_a_console(hStderr): + print("stdout not a console") real_stderr = False if real_stdout or real_stderr: From 4e9bdfeee4963c4b3c2014ca744fa6bc76dcb3cb Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:38:25 -0500 Subject: [PATCH 066/201] please just always work? --- src/allmydata/windows/fixups.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index f69a840f8..df779cab0 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -158,16 +158,15 @@ def initialize(): print("stdout not a console") real_stderr = False - if real_stdout or real_stderr: - if real_stdout: - sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '', _complain) - else: - sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '', _complain) + if real_stdout: + sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '', _complain) + else: + sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '', _complain) - if real_stderr: - sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '', _complain) - else: - sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '', _complain) + if real_stderr: + sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '', _complain) + else: + sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '', _complain) except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) From ab1f6f3a595e42f64204455cb4a7890f729b105f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:39:56 -0500 Subject: [PATCH 067/201] clean up this noise --- src/allmydata/windows/fixups.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index df779cab0..9510e5eb6 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -149,13 +149,11 @@ def initialize(): if real_stdout: hStdout = GetStdHandle(STD_OUTPUT_HANDLE) if not_a_console(hStdout): - print("stdout not a console") real_stdout = False if real_stderr: hStderr = GetStdHandle(STD_ERROR_HANDLE) if not_a_console(hStderr): - print("stdout not a console") real_stderr = False if real_stdout: From 112bfaf62597ad1bb4709921859ccc7fb49a4a23 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:42:27 -0500 Subject: [PATCH 068/201] we would like this to be utf-8 corresponding to the inputs --- src/allmydata/test/test_windows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 857d9d26c..625799bd2 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -209,8 +209,8 @@ class UnicodeOutputTests(SyncTestCase): str(ord(stdout_char)), str(ord(stderr_char)), ], stdout=PIPE, stderr=PIPE) - stdout = p.stdout.read() - stderr = p.stderr.read() + stdout = p.stdout.read().decode("utf-8") + stderr = p.stderr.read().decode("utf-8") returncode = p.wait() self.assertThat( From 1751d682a2c23213a2373fc7e9ba3cc9d48c5f31 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:46:40 -0500 Subject: [PATCH 069/201] is this cool? --- src/allmydata/test/test_windows.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 625799bd2..02646a32f 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -209,8 +209,8 @@ class UnicodeOutputTests(SyncTestCase): str(ord(stdout_char)), str(ord(stderr_char)), ], stdout=PIPE, stderr=PIPE) - stdout = p.stdout.read().decode("utf-8") - stderr = p.stderr.read().decode("utf-8") + stdout = p.stdout.read().decode("utf-8").replace("\r\n", "\n") + stderr = p.stderr.read().decode("utf-8").replace("\r\n", "\n") returncode = p.wait() self.assertThat( From ad2df670e61dbdee3f477f620c3cd614533a3949 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 11:51:01 -0500 Subject: [PATCH 070/201] try using pywin32 for GetStdHandle --- src/allmydata/windows/fixups.py | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 9510e5eb6..818551eb4 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -7,7 +7,14 @@ from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID # from win32api import ( + STD_OUTPUT_HANDLE, + STD_ERROR_HANDLE, SetErrorMode, + + # + # HANDLE WINAPI GetStdHandle(DWORD nStdHandle); + # returns INVALID_HANDLE_VALUE, NULL, or a valid handle + GetStdHandle, ) from win32con import ( SEM_FAILCRITICALERRORS, @@ -95,9 +102,6 @@ def initialize(): # and TZOmegaTZIOY # . try: - # - # HANDLE WINAPI GetStdHandle(DWORD nStdHandle); - # returns INVALID_HANDLE_VALUE, NULL, or a valid handle # # # DWORD WINAPI GetFileType(DWORD hFile); @@ -105,14 +109,6 @@ def initialize(): # # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); - GetStdHandle = WINFUNCTYPE( - HANDLE, DWORD, - use_last_error=True - )(("GetStdHandle", windll.kernel32)) - - STD_OUTPUT_HANDLE = DWORD(-11) - STD_ERROR_HANDLE = DWORD(-12) - GetFileType = WINFUNCTYPE( DWORD, DWORD, use_last_error=True From dc5ed668158848289e5fa27a5ef834793a4e3b8c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 12:31:08 -0500 Subject: [PATCH 071/201] docstring --- src/allmydata/windows/fixups.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 818551eb4..9249f9d80 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -193,6 +193,11 @@ def initialize(): class UnicodeOutput(object): + """ + ``UnicodeOutput`` is a file-like object that encodes unicode to UTF-8 and + writes it to another file or writes unicode natively to the Windows + console. + """ def __init__(self, hConsole, stream, fileno, name, _complain): self._hConsole = hConsole self._stream = stream From ed713182e77e7d92cf27c80da9c29e72df0f693c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 12:31:13 -0500 Subject: [PATCH 072/201] docstring --- src/allmydata/windows/fixups.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 9249f9d80..56a914c01 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -199,6 +199,20 @@ class UnicodeOutput(object): console. """ def __init__(self, hConsole, stream, fileno, name, _complain): + """ + :param hConsole: ``None`` or a handle on the console to which to write + unicode. Mutually exclusive with ``stream``. + + :param stream: ``None`` or a file-like object to which to write bytes. + + :param fileno: A result to hand back from method of the same name. + + :param name: A human-friendly identifier for this output object. + + :param _complain: A one-argument callable which accepts bytes to be + written when there's a problem. Care should be taken to not make + this do a write on this object. + """ self._hConsole = hConsole self._stream = stream self._fileno = fileno From fd223136db3f275c76673ab46c168d58ed60eec9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 12:42:52 -0500 Subject: [PATCH 073/201] Avoid breaking non-Windows with test_windows --- src/allmydata/test/test_windows.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 02646a32f..8d8026584 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -69,10 +69,6 @@ from .common import ( SyncTestCase, ) -from ..windows.fixups import ( - get_argv, -) - slow_settings = settings( suppress_health_check=[HealthCheck.too_slow], deadline=None, @@ -87,9 +83,15 @@ class GetArgvTests(SyncTestCase): """ ``get_argv`` returns a list of unicode strings """ + # Hide the ``allmydata.windows.fixups.get_argv`` import here so it + # doesn't cause failures on non-Windows platforms. + from ..windows.fixups import ( + get_argv, + ) + argv = get_argv() + # We don't know what this process's command line was so we just make # structural assertions here. - argv = get_argv() self.assertThat( argv, MatchesAll( From 6de392fd23874703aa2e07283c891113000e01fc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 12:43:30 -0500 Subject: [PATCH 074/201] blacklist a couple more --- src/allmydata/test/test_python2_regressions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/test/test_python2_regressions.py b/src/allmydata/test/test_python2_regressions.py index 84484f1cf..a3547ca27 100644 --- a/src/allmydata/test/test_python2_regressions.py +++ b/src/allmydata/test/test_python2_regressions.py @@ -16,6 +16,8 @@ from testtools.matchers import ( BLACKLIST = { "allmydata.test.check_load", "allmydata.windows.registry", + "allmydata.windows.fixups", + "allmydata.windows._win_subprocess", } From f5bcd272b8eae0fd48a92b1b6b6437cb9d783162 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 12:58:27 -0500 Subject: [PATCH 075/201] skip the other test suite too --- src/allmydata/test/test_windows.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 8d8026584..40ec4889e 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -172,6 +172,7 @@ class GetArgvTests(SyncTestCase): ) +@skipUnless(platform.isWindows(), "intended for Windows-only codepaths") class UnicodeOutputTests(SyncTestCase): """ Tests for writing unicode to stdout and stderr. From f61103aa8009671ce6a6ada948e1e9ad160686ff Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 13:22:14 -0500 Subject: [PATCH 076/201] spell the module name right --- src/allmydata/test/test_python2_regressions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_python2_regressions.py b/src/allmydata/test/test_python2_regressions.py index a3547ca27..bb0cedad7 100644 --- a/src/allmydata/test/test_python2_regressions.py +++ b/src/allmydata/test/test_python2_regressions.py @@ -15,9 +15,9 @@ from testtools.matchers import ( BLACKLIST = { "allmydata.test.check_load", + "allmydata.test._win_subprocess", "allmydata.windows.registry", "allmydata.windows.fixups", - "allmydata.windows._win_subprocess", } From cca0071cbf0e4b874bd9777275d97a6d9636d4b2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 13:36:04 -0500 Subject: [PATCH 077/201] these aren't win32 specific --- src/allmydata/windows/fixups.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 56a914c01..e3e11a386 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -43,6 +43,9 @@ CommandLineToArgvW = WINFUNCTYPE( )(("CommandLineToArgvW", windll.shell32)) +STDOUT_FILENO = 1 +STDERR_FILENO = 2 + def get_argv(): """ :return [unicode]: The argument list this process was invoked with, as @@ -137,8 +140,6 @@ def initialize(): if hasattr(sys.stderr, 'fileno'): old_stderr_fileno = sys.stderr.fileno() - STDOUT_FILENO = 1 - STDERR_FILENO = 2 real_stdout = (old_stdout_fileno == STDOUT_FILENO) real_stderr = (old_stderr_fileno == STDERR_FILENO) From 5c6e5970c9358cf70d3569aa36ba3bf2673d568f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 13:36:12 -0500 Subject: [PATCH 078/201] get this from pywin32 too --- src/allmydata/windows/fixups.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index e3e11a386..31d7931dd 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -21,6 +21,10 @@ from win32con import ( SEM_NOOPENFILEERRORBOX, ) +from win32file import ( + INVALID_HANDLE_VALUE, +) + # # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, # LPDWORD lpCharsWritten, LPVOID lpReserved); @@ -125,8 +129,6 @@ def initialize(): use_last_error=True )(("GetConsoleMode", windll.kernel32)) - INVALID_HANDLE_VALUE = DWORD(-1).value - def not_a_console(handle): if handle == INVALID_HANDLE_VALUE or handle is None: return True From 184b9735b5d5a938d3f80036f2fd50ebef19c70c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 13:41:48 -0500 Subject: [PATCH 079/201] another constant we can get from pywin32 --- src/allmydata/windows/fixups.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 31d7931dd..a3ce24a6d 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -23,8 +23,12 @@ from win32con import ( from win32file import ( INVALID_HANDLE_VALUE, + FILE_TYPE_CHAR, ) +# This one not exposed by pywin32 as far as I can tell. +FILE_TYPE_REMOTE = 0x8000 + # # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, # LPDWORD lpCharsWritten, LPVOID lpReserved); @@ -121,8 +125,6 @@ def initialize(): use_last_error=True )(("GetFileType", windll.kernel32)) - FILE_TYPE_CHAR = 0x0002 - FILE_TYPE_REMOTE = 0x8000 GetConsoleMode = WINFUNCTYPE( BOOL, HANDLE, POINTER(DWORD), From 52896432e1628a8a0f5864c102d023914a720742 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 13:41:54 -0500 Subject: [PATCH 080/201] it cannot return None --- src/allmydata/windows/fixups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index a3ce24a6d..5d5f25985 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -132,7 +132,7 @@ def initialize(): )(("GetConsoleMode", windll.kernel32)) def not_a_console(handle): - if handle == INVALID_HANDLE_VALUE or handle is None: + if handle == INVALID_HANDLE_VALUE: return True return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR or GetConsoleMode(handle, byref(DWORD())) == 0) From ad48e6c00502be231e241980c51a00ccf0d36d7a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 13:54:04 -0500 Subject: [PATCH 081/201] See if we can use pywin32 GetFileType --- src/allmydata/windows/fixups.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 5d5f25985..45b69cd87 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -24,6 +24,10 @@ from win32con import ( from win32file import ( INVALID_HANDLE_VALUE, FILE_TYPE_CHAR, + + # + # DWORD WINAPI GetFileType(DWORD hFile); + GetFileType, ) # This one not exposed by pywin32 as far as I can tell. @@ -113,19 +117,10 @@ def initialize(): # and TZOmegaTZIOY # . try: - # - # - # DWORD WINAPI GetFileType(DWORD hFile); # # # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); - GetFileType = WINFUNCTYPE( - DWORD, DWORD, - use_last_error=True - )(("GetFileType", windll.kernel32)) - - GetConsoleMode = WINFUNCTYPE( BOOL, HANDLE, POINTER(DWORD), use_last_error=True From 9d7b12292c35e24ba170a6684f2c2ff66085efdf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 13:56:42 -0500 Subject: [PATCH 082/201] Get rid of FILE_TYPE_REMOTE --- src/allmydata/windows/fixups.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 45b69cd87..23b846360 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -30,9 +30,6 @@ from win32file import ( GetFileType, ) -# This one not exposed by pywin32 as far as I can tell. -FILE_TYPE_REMOTE = 0x8000 - # # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, # LPDWORD lpCharsWritten, LPVOID lpReserved); @@ -126,11 +123,15 @@ def initialize(): use_last_error=True )(("GetConsoleMode", windll.kernel32)) - def not_a_console(handle): + def a_console(handle): if handle == INVALID_HANDLE_VALUE: - return True - return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR - or GetConsoleMode(handle, byref(DWORD())) == 0) + return False + return ( + # It's a character file (eg a printer or a console) + GetFileType(handle) == FILE_TYPE_CHAR and + # Checking the console mode doesn't fail (thus it's a console) + GetConsoleMode(handle, byref(DWORD())) != 0 + ) old_stdout_fileno = None old_stderr_fileno = None @@ -144,12 +145,12 @@ def initialize(): if real_stdout: hStdout = GetStdHandle(STD_OUTPUT_HANDLE) - if not_a_console(hStdout): + if not a_console(hStdout): real_stdout = False if real_stderr: hStderr = GetStdHandle(STD_ERROR_HANDLE) - if not_a_console(hStderr): + if not a_console(hStderr): real_stderr = False if real_stdout: From e6ee13d11b63e30568be64f0621da54b49213b74 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 14:03:11 -0500 Subject: [PATCH 083/201] Shovel code around a bit more --- src/allmydata/windows/fixups.py | 68 +++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 23b846360..c71b85681 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -1,6 +1,7 @@ from __future__ import print_function -done = False +import codecs, re +from functools import partial from ctypes import WINFUNCTYPE, windll, POINTER, c_int, WinError, byref, get_last_error from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID @@ -30,10 +31,22 @@ from win32file import ( GetFileType, ) +from allmydata.util import ( + log, +) + +# Keep track of whether `initialize` has run so we don't do any of the +# initialization more than once. +_done = False + +# +# pywin32 for Python 2.7 does not bind any of these *W variants so we do it +# ourselves. +# + # # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, # LPDWORD lpCharsWritten, LPVOID lpReserved); - WriteConsoleW = WINFUNCTYPE( BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID, use_last_error=True @@ -51,6 +64,13 @@ CommandLineToArgvW = WINFUNCTYPE( use_last_error=True )(("CommandLineToArgvW", windll.shell32)) +# +# BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); +GetConsoleMode = WINFUNCTYPE( + BOOL, HANDLE, POINTER(DWORD), + use_last_error=True +)(("GetConsoleMode", windll.kernel32)) + STDOUT_FILENO = 1 STDERR_FILENO = 2 @@ -80,16 +100,11 @@ def get_argv(): def initialize(): - global done + global _done import sys - if sys.platform != "win32" or done: + if sys.platform != "win32" or _done: return True - done = True - - import codecs, re - from functools import partial - - from allmydata.util import log + _done = True SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) @@ -114,25 +129,6 @@ def initialize(): # and TZOmegaTZIOY # . try: - # - # - # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); - - GetConsoleMode = WINFUNCTYPE( - BOOL, HANDLE, POINTER(DWORD), - use_last_error=True - )(("GetConsoleMode", windll.kernel32)) - - def a_console(handle): - if handle == INVALID_HANDLE_VALUE: - return False - return ( - # It's a character file (eg a printer or a console) - GetFileType(handle) == FILE_TYPE_CHAR and - # Checking the console mode doesn't fail (thus it's a console) - GetConsoleMode(handle, byref(DWORD())) != 0 - ) - old_stdout_fileno = None old_stderr_fileno = None if hasattr(sys.stdout, 'fileno'): @@ -193,6 +189,20 @@ def initialize(): sys.argv[0] = sys.argv[0][:-9] +def a_console(handle): + """ + :return: ``True`` if ``handle`` refers to a console, ``False`` otherwise. + """ + if handle == INVALID_HANDLE_VALUE: + return False + return ( + # It's a character file (eg a printer or a console) + GetFileType(handle) == FILE_TYPE_CHAR and + # Checking the console mode doesn't fail (thus it's a console) + GetConsoleMode(handle, byref(DWORD())) != 0 + ) + + class UnicodeOutput(object): """ ``UnicodeOutput`` is a file-like object that encodes unicode to UTF-8 and From a29b061f917e7a7b80383b14e44a5fc64320da96 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 14:04:20 -0500 Subject: [PATCH 084/201] explain the nested import --- src/allmydata/test/test_windows.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 40ec4889e..eb9321bfc 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -125,6 +125,13 @@ class GetArgvTests(SyncTestCase): ``get_argv`` returns a list representing the result of tokenizing the "command line" argument string provided to Windows processes. """ + # Python 2.7 doesn't have good options for launching a process with + # non-ASCII in its command line. So use this alternative that does a + # better job. Bury the import here because it only works on Windows. + from ._win_subprocess import ( + Popen + ) + working_path = FilePath(self.mktemp()) working_path.makedirs() save_argv_path = working_path.child("script.py") @@ -144,11 +151,6 @@ class GetArgvTests(SyncTestCase): f.write(json.dumps(get_argv())) """.format(saved_argv_path.path)), ) - # Python 2.7 doesn't have good options for launching a process with - # non-ASCII in its command line. - from ._win_subprocess import ( - Popen - ) argv = [executable.decode("utf-8"), save_argv_path.path] + argv p = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE) p.stdin.close() From a4c520ec2a62adfe58a813c6893533e4e4a6aa5f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 14:11:28 -0500 Subject: [PATCH 085/201] try to go faster without losing coverage --- src/allmydata/test/test_windows.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index eb9321bfc..2404542fa 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -72,6 +72,13 @@ from .common import ( slow_settings = settings( suppress_health_check=[HealthCheck.too_slow], deadline=None, + + # Reduce the number of examples required to consider the test a success. + # The default is 100. Launching a process is expensive so we'll try to do + # it as few times as we can get away with. To maintain good coverage, + # we'll try to pass as much data to each process as we can so we're still + # covering a good portion of the space. + max_examples=10, ) @skipUnless(platform.isWindows(), "get_argv is Windows-only") @@ -113,11 +120,11 @@ class GetArgvTests(SyncTestCase): # our generated arguments. blacklist_characters=('\x00',), ), - min_size=1, - max_size=4, + min_size=10, + max_size=20, ), - min_size=1, - max_size=4, + min_size=10, + max_size=20, ), ) def test_argv_values(self, argv): From 41d754852722d6405d57083bd3fa82556b6694a0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 14:16:02 -0500 Subject: [PATCH 086/201] typo --- src/allmydata/test/test_windows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index 2404542fa..f2c1318c5 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -206,7 +206,7 @@ class UnicodeOutputTests(SyncTestCase): initialize() # XXX A shortcoming of the monkey-patch approach is that you'd - # better not iport stdout or stderr before you call initialize. + # better not import stdout or stderr before you call initialize. from sys import argv, stdout, stderr stdout.write(chr(int(argv[1]))) From 73110f48da854b00ea2c3cb25c15152ed584ab04 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 14:56:46 -0500 Subject: [PATCH 087/201] Banish getProcessOutputAndValue from test_runner It cannot do the right thing on Windows for non-ASCII because Twisted uses pywin32 and on Python 2 pywin32 binds CreateProcessA. --- src/allmydata/test/test_runner.py | 147 +++++++++++++----------------- src/allmydata/test/test_system.py | 22 ++++- 2 files changed, 85 insertions(+), 84 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index ef2b99a19..64afca939 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -5,6 +5,14 @@ from __future__ import ( import os.path, re, sys from os import linesep +from subprocess import ( + PIPE, + Popen, +) + +from eliot import ( + log_call, +) from twisted.trial import unittest @@ -19,7 +27,6 @@ from twisted.python.runtime import ( platform, ) from allmydata.util import fileutil, pollmixin -from allmydata.util.encodingutil import unicode_to_argv, unicode_to_output from allmydata.test import common_util import allmydata from .common_util import parse_cli, run_cli @@ -29,12 +36,8 @@ from .cli_node_api import ( on_stdout, on_stdout_and_stderr, ) -from ._twisted_9607 import ( - getProcessOutputAndValue, -) from ..util.eliotutil import ( inline_callbacks, - log_call_deferred, ) def get_root_from_file(src): @@ -54,93 +57,72 @@ srcfile = allmydata.__file__ rootdir = get_root_from_file(srcfile) -class RunBinTahoeMixin(object): - @log_call_deferred(action_type="run-bin-tahoe") - def run_bintahoe(self, args, stdin=None, python_options=[], env=None): - command = sys.executable - argv = python_options + ["-m", "allmydata.scripts.runner"] + args +@log_call(action_type="run-bin-tahoe") +def run_bintahoe(extra_argv): + """ + Run the main Tahoe entrypoint in a child process with the given additional + arguments. - if env is None: - env = os.environ + :param [unicode] extra_argv: More arguments for the child process argv. - d = getProcessOutputAndValue(command, argv, env, stdinBytes=stdin) - def fix_signal(result): - # Mirror subprocess.Popen.returncode structure - (out, err, signal) = result - return (out, err, -signal) - d.addErrback(fix_signal) - return d + :return: A three-tuple of stdout (unicode), stderr (unicode), and the + child process "returncode" (int). + """ + argv = [sys.executable, u"-m", u"allmydata.scripts.runner"] + extra_argv + p = Popen(argv, stdout=PIPE, stderr=PIPE) + out = p.stdout.read().decode("utf-8") + err = p.stderr.read().decode("utf-8") + returncode = p.wait() + return (out, err, returncode) -class BinTahoe(common_util.SignalMixin, unittest.TestCase, RunBinTahoeMixin): +class BinTahoe(common_util.SignalMixin, unittest.TestCase): def test_unicode_arguments_and_output(self): + """ + The runner script receives unmangled non-ASCII values in argv. + """ tricky = u"\u2621" - try: - tricky_arg = unicode_to_argv(tricky, mangle=True) - tricky_out = unicode_to_output(tricky) - except UnicodeEncodeError: - raise unittest.SkipTest("A non-ASCII argument/output could not be encoded on this platform.") + out, err, returncode = run_bintahoe([tricky]) + self.assertEqual(returncode, 1) + self.assertIn(u"Unknown command: " + tricky, out) - d = self.run_bintahoe([tricky_arg]) - def _cb(res): - out, err, rc_or_sig = res - self.failUnlessEqual(rc_or_sig, 1, str(res)) - self.failUnlessIn("Unknown command: "+tricky_out, out) - d.addCallback(_cb) - return d - - def test_run_with_python_options(self): - # -t is a harmless option that warns about tabs. - d = self.run_bintahoe(["--version"], python_options=["-t"]) - def _cb(res): - out, err, rc_or_sig = res - self.assertEqual(rc_or_sig, 0, str(res)) - self.assertTrue(out.startswith(allmydata.__appname__ + '/'), str(res)) - d.addCallback(_cb) - return d - - @inlineCallbacks def test_help_eliot_destinations(self): - out, err, rc_or_sig = yield self.run_bintahoe(["--help-eliot-destinations"]) - self.assertIn("\tfile:", out) - self.assertEqual(rc_or_sig, 0) + out, err, returncode = run_bintahoe([u"--help-eliot-destinations"]) + self.assertIn(u"\tfile:", out) + self.assertEqual(returncode, 0) - @inlineCallbacks def test_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ + out, err, returncode = run_bintahoe([ # Proves little but maybe more than nothing. - "--eliot-destination=file:-", + u"--eliot-destination=file:-", # Throw in *some* command or the process exits with error, making # it difficult for us to see if the previous arg was accepted or # not. - "--help", + u"--help", ]) - self.assertEqual(rc_or_sig, 0) + self.assertEqual(returncode, 0) - @inlineCallbacks def test_unknown_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=invalid:more", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=invalid:more", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("Unknown destination description", out) - self.assertIn("invalid:more", out) + self.assertEqual(1, returncode) + self.assertIn(u"Unknown destination description", out) + self.assertIn(u"invalid:more", out) - @inlineCallbacks def test_malformed_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=invalid", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=invalid", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("must be formatted like", out) + self.assertEqual(1, returncode) + self.assertIn(u"must be formatted like", out) - @inlineCallbacks def test_escape_in_eliot_destination(self): - out, err, rc_or_sig = yield self.run_bintahoe([ - "--eliot-destination=file:@foo", + out, err, returncode = run_bintahoe([ + u"--eliot-destination=file:@foo", ]) - self.assertEqual(1, rc_or_sig) - self.assertIn("Unsupported escape character", out) + self.assertEqual(1, returncode) + self.assertIn(u"Unsupported escape character", out) class CreateNode(unittest.TestCase): @@ -250,8 +232,7 @@ class CreateNode(unittest.TestCase): ) -class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, - RunBinTahoeMixin): +class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin): """ exercise "tahoe run" for both introducer and client node, by spawning "tahoe run" as a subprocess. This doesn't get us line-level coverage, but @@ -271,18 +252,18 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, The introducer furl is stable across restarts. """ basedir = self.workdir("test_introducer") - c1 = os.path.join(basedir, "c1") + c1 = os.path.join(basedir, u"c1") tahoe = CLINodeAPI(reactor, FilePath(c1)) self.addCleanup(tahoe.stop_and_wait) - out, err, rc_or_sig = yield self.run_bintahoe([ - "--quiet", - "create-introducer", - "--basedir", c1, - "--hostname", "127.0.0.1", + out, err, returncode = run_bintahoe([ + u"--quiet", + u"create-introducer", + u"--basedir", c1, + u"--hostname", u"127.0.0.1", ]) - self.assertEqual(rc_or_sig, 0) + self.assertEqual(returncode, 0) # This makes sure that node.url is written, which allows us to # detect when the introducer restarts in _node_has_restarted below. @@ -350,18 +331,18 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin, 3) Verify that the pid file is removed after SIGTERM (on POSIX). """ basedir = self.workdir("test_client") - c1 = os.path.join(basedir, "c1") + c1 = os.path.join(basedir, u"c1") tahoe = CLINodeAPI(reactor, FilePath(c1)) # Set this up right now so we don't forget later. self.addCleanup(tahoe.cleanup) - out, err, rc_or_sig = yield self.run_bintahoe([ - "--quiet", "create-node", "--basedir", c1, - "--webport", "0", - "--hostname", "localhost", + out, err, returncode = run_bintahoe([ + u"--quiet", u"create-node", u"--basedir", c1, + u"--webport", u"0", + u"--hostname", u"localhost", ]) - self.failUnlessEqual(rc_or_sig, 0) + self.failUnlessEqual(returncode, 0) # Check that the --webport option worked. config = fileutil.read(tahoe.config_file.path) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 235361cf8..75219004b 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -51,6 +51,10 @@ from twisted.python.filepath import ( FilePath, ) +from ._twisted_9607 import ( + getProcessOutputAndValue, +) + from .common import ( TEST_RSA_KEY_SIZE, SameProcessStreamEndpointAssigner, @@ -61,13 +65,29 @@ from .web.common import ( ) # TODO: move this to common or common_util -from allmydata.test.test_runner import RunBinTahoeMixin from . import common_util as testutil from .common_util import run_cli_unicode from ..scripts.common import ( write_introducer, ) +class RunBinTahoeMixin(object): + def run_bintahoe(self, args, stdin=None, python_options=[], env=None): + command = sys.executable + argv = python_options + ["-m", "allmydata.scripts.runner"] + args + + if env is None: + env = os.environ + + d = getProcessOutputAndValue(command, argv, env, stdinBytes=stdin) + def fix_signal(result): + # Mirror subprocess.Popen.returncode structure + (out, err, signal) = result + return (out, err, -signal) + d.addErrback(fix_signal) + return d + + def run_cli(*args, **kwargs): """ Run a Tahoe-LAFS CLI utility, but inline. From c6d108ddb25b367b041d97a6110a0666d9307062 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:07:37 -0500 Subject: [PATCH 088/201] Make test_runner and test_windows both use the good Popen --- src/allmydata/test/common.py | 22 ++++++++++++++++++++++ src/allmydata/test/test_runner.py | 13 ++++++++----- src/allmydata/test/test_windows.py | 14 ++------------ 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index f1dbf651d..a49a79ec0 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -9,6 +9,10 @@ __all__ = [ "flush_logged_errors", "skip", "skipIf", + + # Selected based on platform and re-exported for convenience. + "Popen", + "PIPE", ] from past.builtins import chr as byteschr, unicode @@ -48,6 +52,9 @@ from testtools.twistedsupport import ( flush_logged_errors, ) +from twisted.python.runtime import ( + platform, +) from twisted.application import service from twisted.plugin import IPlugin from twisted.internet import defer @@ -101,6 +108,21 @@ from .eliotutil import ( ) from .common_util import ShouldFailMixin # noqa: F401 +if platform.isWindows(): + # Python 2.7 doesn't have good options for launching a process with + # non-ASCII in its command line. So use this alternative that does a + # better job. However, only use it on Windows because it doesn't work + # anywhere else. + from ._win_subprocess import ( + PIPE, + Popen, + ) +else: + from subprocess import ( + PIPE, + Popen, + ) + TEST_RSA_KEY_SIZE = 522 diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 64afca939..c98d4e376 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -5,10 +5,6 @@ from __future__ import ( import os.path, re, sys from os import linesep -from subprocess import ( - PIPE, - Popen, -) from eliot import ( log_call, @@ -29,7 +25,14 @@ from twisted.python.runtime import ( from allmydata.util import fileutil, pollmixin from allmydata.test import common_util import allmydata -from .common_util import parse_cli, run_cli +from .common import ( + PIPE, + Popen, +) +from .common_util import ( + parse_cli, + run_cli, +) from .cli_node_api import ( CLINodeAPI, Expect, diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py index f2c1318c5..01e4a57c1 100644 --- a/src/allmydata/test/test_windows.py +++ b/src/allmydata/test/test_windows.py @@ -29,11 +29,6 @@ from json import ( from textwrap import ( dedent, ) -from subprocess import ( - PIPE, - Popen, -) - from twisted.python.filepath import ( FilePath, ) @@ -66,6 +61,8 @@ from hypothesis.strategies import ( ) from .common import ( + PIPE, + Popen, SyncTestCase, ) @@ -132,13 +129,6 @@ class GetArgvTests(SyncTestCase): ``get_argv`` returns a list representing the result of tokenizing the "command line" argument string provided to Windows processes. """ - # Python 2.7 doesn't have good options for launching a process with - # non-ASCII in its command line. So use this alternative that does a - # better job. Bury the import here because it only works on Windows. - from ._win_subprocess import ( - Popen - ) - working_path = FilePath(self.mktemp()) working_path.makedirs() save_argv_path = working_path.child("script.py") From 834abfe6bf8736d5c96477aca7028a297e689717 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:09:25 -0500 Subject: [PATCH 089/201] _win_subprocess didn't actually export this --- src/allmydata/test/common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index a49a79ec0..2b1adebd9 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -114,15 +114,15 @@ if platform.isWindows(): # better job. However, only use it on Windows because it doesn't work # anywhere else. from ._win_subprocess import ( - PIPE, Popen, ) else: from subprocess import ( - PIPE, Popen, ) - +from subprocess import ( + PIPE, +) TEST_RSA_KEY_SIZE = 522 From 5df86b46084355cdc92d875e527f9ca08281d0a1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:26:12 -0500 Subject: [PATCH 090/201] restore test_with_python_options now that I see what it's testing --- src/allmydata/test/test_runner.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index c98d4e376..ad03bd391 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -61,7 +61,7 @@ rootdir = get_root_from_file(srcfile) @log_call(action_type="run-bin-tahoe") -def run_bintahoe(extra_argv): +def run_bintahoe(extra_argv, python_options=None): """ Run the main Tahoe entrypoint in a child process with the given additional arguments. @@ -71,7 +71,11 @@ def run_bintahoe(extra_argv): :return: A three-tuple of stdout (unicode), stderr (unicode), and the child process "returncode" (int). """ - argv = [sys.executable, u"-m", u"allmydata.scripts.runner"] + extra_argv + argv = [sys.executable] + if python_options is not None: + argv.extend(python_options) + argv.extend([u"-m", u"allmydata.scripts.runner"]) + argv.extend(extra_argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") @@ -89,6 +93,21 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): self.assertEqual(returncode, 1) self.assertIn(u"Unknown command: " + tricky, out) + def test_with_python_options(self): + """ + Additional options for the Python interpreter don't prevent the runner + script from receiving the arguments meant for it. + """ + # This seems like a redundant test for someone else's functionality + # but on Windows we parse the whole command line string ourselves so + # we have to have our own implementation of skipping these options. + + # -t is a harmless option that warns about tabs so we can add it + # -without impacting other behavior noticably. + out, err, returncode = run_bintahoe(["--version"], python_options=["-t"]) + self.assertEqual(returncode, 0) + self.assertTrue(out.startswith(allmydata.__appname__ + '/')) + def test_help_eliot_destinations(self): out, err, returncode = run_bintahoe([u"--help-eliot-destinations"]) self.assertIn(u"\tfile:", out) From b81d57779a1b9a3a09c6de44e9d3e68d56eaed63 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:29:12 -0500 Subject: [PATCH 091/201] Tahoe's .pyscript is ancient history --- src/allmydata/windows/fixups.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index c71b85681..237f96ca5 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -185,8 +185,6 @@ def initialize(): # as in the case of a frozen executable created by bb-freeze or similar. sys.argv = argv[-len(sys.argv):] - if sys.argv[0].endswith('.pyscript'): - sys.argv[0] = sys.argv[0][:-9] def a_console(handle): From 1639aef197db54a3522293f16ed0acc34e0e955f Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:29:32 -0500 Subject: [PATCH 092/201] Get rid of the argv unmangling that we no longer do --- src/allmydata/windows/fixups.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index 237f96ca5..d34404aed 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -161,29 +161,13 @@ def initialize(): except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) - # This works around . - - # Because of (and similar limitations in - # twisted), the 'bin/tahoe' script cannot invoke us with the actual Unicode arguments. - # Instead it "mangles" or escapes them using \x7F as an escape character, which we - # unescape here. - def unmangle(s): - return re.sub(u'\\x7F[0-9a-fA-F]*\\;', lambda m: unichr(int(m.group(0)[1:-1], 16)), s) - - argv_unicode = get_argv() - try: - argv = [unmangle(argv_u).encode('utf-8') for argv_u in argv_unicode] - except Exception as e: - _complain("%s: could not unmangle Unicode arguments.\n%r" - % (sys.argv[0], argv_unicode)) - raise + argv = list(arg.encode("utf-8") for arg in get_argv()) # Take only the suffix with the same number of arguments as sys.argv. # This accounts for anything that can cause initial arguments to be stripped, # for example, the Python interpreter or any options passed to it, or runner # scripts such as 'coverage run'. It works even if there are no such arguments, # as in the case of a frozen executable created by bb-freeze or similar. - sys.argv = argv[-len(sys.argv):] From 2306819db1829da1eb839fa38da8b2dd4cf4970a Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 15:45:39 -0500 Subject: [PATCH 093/201] Get rid of unicode_to_argv and argv_to_unicode --- src/allmydata/scripts/cli.py | 42 ++++++++++++------------- src/allmydata/scripts/create_node.py | 6 ++-- src/allmydata/test/cli/common.py | 5 +-- src/allmydata/test/cli/test_backup.py | 5 +-- src/allmydata/test/cli/test_put.py | 7 +++-- src/allmydata/test/common_util.py | 2 +- src/allmydata/test/test_encodingutil.py | 35 ++------------------- src/allmydata/test/test_system.py | 4 +-- src/allmydata/util/encodingutil.py | 38 +--------------------- 9 files changed, 42 insertions(+), 102 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 379e1d212..bad96a252 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -4,7 +4,7 @@ import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions -from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path +from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path from .tahoe_status import TahoeStatusCommand NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions): ] def parseArgs(self, where=""): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): @@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions): class AddAliasOptions(FileStoreOptions): def parseArgs(self, alias, cap): - self.alias = argv_to_unicode(alias) + self.alias = unicode(alias, "utf-8") if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap @@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions): class CreateAliasOptions(FileStoreOptions): def parseArgs(self, alias): - self.alias = argv_to_unicode(alias) + self.alias = unicode(alias, "utf-8") if self.alias.endswith(u':'): self.alias = self.alias[:-1] @@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions): ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [PATH]" @@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions): if arg2 == "-": arg2 = None - self.from_file = argv_to_unicode(arg1) + self.from_file = unicode(arg1, "utf-8") self.to_file = None if arg2 is None else argv_to_abspath(arg2) synopsis = "[options] REMOTE_FILE LOCAL_FILE" @@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions): arg1 = None self.from_file = None if arg1 is None else argv_to_abspath(arg1) - self.to_file = None if arg2 is None else argv_to_unicode(arg2) + self.to_file = None if arg2 is None else unicode(arg2, "utf-8") if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): @@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = map(argv_to_unicode, args[:-1]) - self.destination = argv_to_unicode(args[-1]) + self.sources = list(unicode(a, "utf-8") for a in args[:-1]) + self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions): class UnlinkOptions(FileStoreOptions): def parseArgs(self, where): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] REMOTE_FILE" description = "Remove a named file from its parent directory." class MvOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = argv_to_unicode(frompath) - self.to_file = argv_to_unicode(topath) + self.from_file = unicode(frompath, "utf-8") + self.to_file = unicode(topath, "utf-8") synopsis = "[options] FROM TO" @@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions): class LnOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = argv_to_unicode(frompath) - self.to_file = argv_to_unicode(topath) + self.from_file = unicode(frompath, "utf-8") + self.to_file = unicode(topath, "utf-8") synopsis = "[options] FROM_LINK TO_LINK" @@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions): def parseArgs(self, localdir, topath): self.from_dir = argv_to_abspath(localdir) - self.to_dir = argv_to_unicode(topath) + self.to_dir = unicode(topath, "utf-8") synopsis = "[options] FROM ALIAS:TO" def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" - g = argv_to_unicode(pattern).strip() + g = unicode(pattern, "utf-8").strip() if g: exclude = self['exclude'] exclude.add(g) @@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" @@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" description = """ @@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = argv_to_unicode(where) + self.where = unicode(where, "utf-8") synopsis = "[options] [ALIAS:PATH]" description = """ @@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = map(argv_to_unicode, locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = map(argv_to_unicode, locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index ac17cf445..ed4f0c71d 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -16,7 +16,7 @@ from allmydata.scripts.common import ( ) from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.assertutil import precondition -from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding +from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding from allmydata.util import fileutil, i2p_provider, iputil, tor_provider from wormhole import wormhole @@ -238,7 +238,7 @@ def write_node_config(c, config): c.write("\n") c.write("[node]\n") - nickname = argv_to_unicode(config.get("nickname") or "") + nickname = unicode(config.get("nickname") or "", "utf-8") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) if config["hide-ip"]: c.write("reveal-IP-address = false\n") @@ -246,7 +246,7 @@ def write_node_config(c, config): c.write("reveal-IP-address = true\n") # TODO: validate webport - webport = argv_to_unicode(config.get("webport") or "none") + webport = unicode(config.get("webport") or "none", "utf-8") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index bf175de44..13445ef0a 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,4 +1,5 @@ -from ...util.encodingutil import unicode_to_argv +from six import ensure_str + from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -45,6 +46,6 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) + client_dir = ensure_str(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index ceecbd662..6aecd0af6 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -1,4 +1,5 @@ import os.path +from six import ensure_str from six.moves import cStringIO as StringIO from datetime import timedelta import re @@ -9,7 +10,7 @@ from twisted.python.monkey import MonkeyPatcher import __builtin__ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.namespace import Namespace from allmydata.scripts import cli, backupdb from ..common_util import StallMixin @@ -413,7 +414,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): return StringIO() patcher = MonkeyPatcher((__builtin__, 'file', call_file)) - patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) + patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to']) self.failUnless(ns.called) def test_ignore_symlinks(self): diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 08a66f98d..2deafb784 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,4 +1,7 @@ import os.path + +from six import ensure_str + from twisted.trial import unittest from twisted.python import usage @@ -7,7 +10,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin @@ -47,7 +50,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid(oneshare=True) rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) + abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 2a70cff3a..7b3194d3f 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs): encoding = kwargs.pop("encoding", None) precondition( all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)), - "arguments to run_cli must be a native string -- convert using unicode_to_argv", + "arguments to run_cli must be a native string -- convert using UTF-8", verb=verb, args=args, nodeargs=nodeargs, diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index cbc9143b7..5f6700cd6 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -81,12 +81,12 @@ from allmydata.test.common_util import ( ReallyEqualMixin, skip_if_cannot_represent_filename, ) from allmydata.util import encodingutil, fileutil -from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ +from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ - to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ - unicode_to_argv + to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from + from twisted.python import usage @@ -138,12 +138,6 @@ class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): _reload() self.assertEqual(get_io_encoding(), 'utf-8') - def test_argv_to_unicode(self): - encodingutil.io_encoding = 'utf-8' - self.failUnlessRaises(usage.UsageError, - argv_to_unicode, - lumiere_nfc.encode('latin1')) - @skipIf(PY3, "Python 2 only.") def test_unicode_to_output(self): encodingutil.io_encoding = 'koi8-r' @@ -213,19 +207,6 @@ class EncodingUtil(ReallyEqualMixin): sys.platform = self.original_platform _reload() - def test_argv_to_unicode(self): - if 'argv' not in dir(self): - return - - mock_stdout = MockStdout() - mock_stdout.encoding = self.io_encoding - self.patch(sys, 'stdout', mock_stdout) - - argu = lumiere_nfc - argv = self.argv - _reload() - self.failUnlessReallyEqual(argv_to_unicode(argv), argu) - def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") @@ -245,16 +226,6 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_output_py3(self): self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) - @skipIf(PY3, "Python 2 only.") - def test_unicode_to_argv_py2(self): - """unicode_to_argv() converts to bytes on Python 2.""" - self.assertEqual(unicode_to_argv("abc"), u"abc".encode(self.io_encoding)) - - @skipIf(PY2, "Python 3 only.") - def test_unicode_to_argv_py3(self): - """unicode_to_argv() is noop on Python 3.""" - self.assertEqual(unicode_to_argv("abc"), "abc") - @skipIf(PY3, "Python 3 only.") def test_unicode_platform_py2(self): matrix = { diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 75219004b..03b9ba2de 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil, pollmixin, fileutil from allmydata.util import log, base32 -from allmydata.util.encodingutil import quote_output, unicode_to_argv +from allmydata.util.encodingutil import quote_output from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ @@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): log.msg("test_system.SystemTest._test_runner using %r" % filename) rc,output,err = yield run_cli("debug", "dump-share", "--offsets", - unicode_to_argv(filename)) + ensure_str(filename)) self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index f13dc5b8e..5cc3b8d19 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -107,53 +107,17 @@ def get_io_encoding(): """ return io_encoding -def argv_to_unicode(s): - """ - Decode given argv element to unicode. If this fails, raise a UsageError. - """ - if isinstance(s, unicode): - return s - - precondition(isinstance(s, bytes), s) - - try: - return unicode(s, io_encoding) - except UnicodeDecodeError: - raise usage.UsageError("Argument %s cannot be decoded as %s." % - (quote_output(s), io_encoding)) - def argv_to_abspath(s, **kwargs): """ Convenience function to decode an argv element to an absolute path, with ~ expanded. If this fails, raise a UsageError. """ - decoded = argv_to_unicode(s) + decoded = unicode(s, "utf-8") if decoded.startswith(u'-'): raise usage.UsageError("Path argument %s cannot start with '-'.\nUse %s if you intended to refer to a file." % (quote_output(s), quote_output(os.path.join('.', s)))) return abspath_expanduser_unicode(decoded, **kwargs) -def unicode_to_argv(s, mangle=False): - """ - Encode the given Unicode argument as a bytestring. - If the argument is to be passed to a different process, then the 'mangle' argument - should be true; on Windows, this uses a mangled encoding that will be reversed by - code in runner.py. - - On Python 3, just return the string unchanged, since argv is unicode. - """ - precondition(isinstance(s, unicode), s) - if PY3: - warnings.warn("This will be unnecessary once Python 2 is dropped.", - DeprecationWarning) - return s - - if mangle and sys.platform == "win32": - # This must be the same as 'mangle' in bin/tahoe-script.template. - return bytes(re.sub(u'[^\\x20-\\x7F]', lambda m: u'\x7F%x;' % (ord(m.group(0)),), s), io_encoding) - else: - return s.encode(io_encoding) - def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 260706d33015a3df608db94d9fbd0de4cea481cc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:00:42 -0500 Subject: [PATCH 094/201] Fix the collision with the builtin list --- src/allmydata/scripts/cli.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index bad96a252..310cb20fc 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,4 +1,5 @@ from __future__ import print_function +import __builtin__ import os.path, re, fnmatch from twisted.python import usage @@ -218,7 +219,7 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = list(unicode(a, "utf-8") for a in args[:-1]) + self.sources = __builtin__.list(unicode(a, "utf-8") for a in args[:-1]) self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -429,7 +430,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +447,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ From b8abec607335f3b4242f44ea7bf832cc7df8bef6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:00:48 -0500 Subject: [PATCH 095/201] Get rid of the Latin-1 case Here's a supposition: UTF-8 or bust --- src/allmydata/test/cli/test_alias.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 72b634608..07f42b29d 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -99,22 +99,6 @@ class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): ) - def test_list_latin_1(self): - """ - An alias composed of all Latin-1-encodeable code points can be created - when the active encoding is Latin-1. - - This is very similar to ``test_list_utf_8`` but the assumption of - UTF-8 is nearly ubiquitous and explicitly exercising the codepaths - with a UTF-8-incompatible encoding helps flush out unintentional UTF-8 - assumptions. - """ - return self._check_create_alias( - u"taho\N{LATIN SMALL LETTER E WITH ACUTE}", - encoding="latin-1", - ) - - def test_list_utf_8(self): """ An alias composed of all UTF-8-encodeable code points can be created when From ec6c036f87fb74937cc4a246fcb4916575809ba6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 16:14:34 -0500 Subject: [PATCH 096/201] less cheesy list collision fix --- src/allmydata/scripts/cli.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 310cb20fc..c00917022 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,5 +1,4 @@ from __future__ import print_function -import __builtin__ import os.path, re, fnmatch from twisted.python import usage @@ -219,7 +218,7 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = __builtin__.list(unicode(a, "utf-8") for a in args[:-1]) + self.sources = list(unicode(a, "utf-8") for a in args[:-1]) self.destination = unicode(args[-1], "utf-8") synopsis = "[options] FROM.. TO" @@ -430,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -447,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = __builtin__.list(unicode(a, "utf-8") for a in locations) + self.locations = list(unicode(a, "utf-8") for a in locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -496,7 +495,7 @@ def list_aliases(options): rc = tahoe_add_alias.list_aliases(options) return rc -def list(options): +def list_(options): from allmydata.scripts import tahoe_ls rc = tahoe_ls.list(options) return rc @@ -582,7 +581,7 @@ dispatch = { "add-alias": add_alias, "create-alias": create_alias, "list-aliases": list_aliases, - "ls": list, + "ls": list_, "get": get, "put": put, "cp": cp, From de9bcc7ea85e0e0ab7da11aabd4b2d6a4ecdf07e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:21:20 -0500 Subject: [PATCH 097/201] encode Popen argv as UTF-8 on POSIX so we ignore locale --- src/allmydata/test/test_runner.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index ad03bd391..4054dc289 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -76,6 +76,11 @@ def run_bintahoe(extra_argv, python_options=None): argv.extend(python_options) argv.extend([u"-m", u"allmydata.scripts.runner"]) argv.extend(extra_argv) + if not platform.isWindows(): + # On POSIX Popen (via execvp) will encode argv using the "filesystem" + # encoding. Depending on LANG this may make our unicode arguments + # unencodable. Do our own UTF-8 encoding here instead. + argv = list(arg.encode("utf-8") for arg in argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") From 3d02545006ed08187be9cb9ad6902d1b6b543aa9 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:29:15 -0500 Subject: [PATCH 098/201] Remove tests based on locale behavior We don't like locale behavior --- src/allmydata/test/test_encodingutil.py | 65 +------------------------ 1 file changed, 1 insertion(+), 64 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 5f6700cd6..d49abafb3 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -70,7 +70,7 @@ if __name__ == "__main__": sys.exit(0) -import os, sys, locale +import os, sys from unittest import skipIf from twisted.trial import unittest @@ -93,69 +93,6 @@ from twisted.python import usage class MockStdout(object): pass -class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): - def test_get_io_encoding(self): - mock_stdout = MockStdout() - self.patch(sys, 'stdout', mock_stdout) - - mock_stdout.encoding = 'UTF-8' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') - - mock_stdout.encoding = 'cp65001' - _reload() - self.assertEqual(get_io_encoding(), 'utf-8') - - mock_stdout.encoding = 'koi8-r' - expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - mock_stdout.encoding = 'nonexistent_encoding' - if sys.platform == "win32": - _reload() - self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') - else: - self.failUnlessRaises(AssertionError, _reload) - - def test_get_io_encoding_not_from_stdout(self): - preferredencoding = 'koi8-r' - def call_locale_getpreferredencoding(): - return preferredencoding - self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding) - mock_stdout = MockStdout() - self.patch(sys, 'stdout', mock_stdout) - - expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - mock_stdout.encoding = None - _reload() - self.failUnlessReallyEqual(get_io_encoding(), expected) - - preferredencoding = None - _reload() - self.assertEqual(get_io_encoding(), 'utf-8') - - @skipIf(PY3, "Python 2 only.") - def test_unicode_to_output(self): - encodingutil.io_encoding = 'koi8-r' - self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) - - def test_no_unicode_normalization(self): - # Pretend to run on a Unicode platform. - # listdir_unicode normalized to NFC in 1.7beta, but now doesn't. - - def call_os_listdir(path): - return [Artonwall_nfd] - self.patch(os, 'listdir', call_os_listdir) - self.patch(sys, 'platform', 'darwin') - - _reload() - self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd]) - - # The following tests apply only to platforms that don't store filenames as # Unicode entities on the filesystem. class EncodingUtilNonUnicodePlatform(unittest.TestCase): From 23c34004a74ed9e95e5d25f04d3410286e5a1cac Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:29:49 -0500 Subject: [PATCH 099/201] Get rid of tests for bad io_encoding values We don't like bad io_encoding values --- src/allmydata/test/test_encodingutil.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index d49abafb3..a3c92d41c 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -84,11 +84,9 @@ from allmydata.util import encodingutil, fileutil from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ - get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ + get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from -from twisted.python import usage - class MockStdout(object): pass @@ -371,13 +369,6 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): check(u"\n", u"\"\\x0a\"", quote_newlines=True) def test_quote_output_default(self): - self.patch(encodingutil, 'io_encoding', 'ascii') - self.test_quote_output_ascii(None) - - self.patch(encodingutil, 'io_encoding', 'latin1') - self.test_quote_output_latin1(None) - - self.patch(encodingutil, 'io_encoding', 'utf-8') self.test_quote_output_utf8(None) From 60a44b99e69e33395daf48c46dfb0d4dc1ea3981 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:30:15 -0500 Subject: [PATCH 100/201] improve fixtures --- src/allmydata/test/test_encodingutil.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index a3c92d41c..d9d6cfeed 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -97,12 +97,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") def setUp(self): # Mock sys.platform because unicode_platform() uses it - self.original_platform = sys.platform - sys.platform = 'linux' - - def tearDown(self): - sys.platform = self.original_platform - _reload() + self.patch(sys, "platform", "linux") + self.addCleanup(_reload) def test_listdir_unicode(self): # What happens if latin1-encoded filenames are encountered on an UTF-8 @@ -135,12 +131,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): class EncodingUtil(ReallyEqualMixin): def setUp(self): - self.original_platform = sys.platform - sys.platform = self.platform - - def tearDown(self): - sys.platform = self.original_platform - _reload() + self.patch(sys, "platform", self.platform) + self.addCleanup(_reload) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") From 70d2fd66729789e548903618c566aebca82f6105 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:31:22 -0500 Subject: [PATCH 101/201] Don't have a Latin-1 io_encoding It's bad --- src/allmydata/test/test_encodingutil.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index d9d6cfeed..992ebd690 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -472,14 +472,6 @@ class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase): io_encoding = 'UTF-8' dirlist = [b'test_file', b'\xc3\x84rtonwall.mp3', b'Blah blah.txt'] -class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase): - uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' - argv = b'lumi\xe8re' - platform = 'linux2' - filesystem_encoding = 'ISO-8859-1' - io_encoding = 'ISO-8859-1' - dirlist = [b'test_file', b'Blah blah.txt', b'\xc4rtonwall.mp3'] - class Windows(EncodingUtil, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' argv = b'lumi\xc3\xa8re' From 1810f4e99b5d06869bef8d050d4614f93ed4a2f4 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Mon, 11 Jan 2021 19:31:41 -0500 Subject: [PATCH 102/201] Force the encoding to utf-8 more often --- src/allmydata/util/encodingutil.py | 38 +++++++----------------------- 1 file changed, 8 insertions(+), 30 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 5cc3b8d19..289874213 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -44,41 +44,19 @@ def canonical_encoding(encoding): return encoding -def check_encoding(encoding): - # sometimes Python returns an encoding name that it doesn't support for conversion - # fail early if this happens - try: - u"test".encode(encoding) - except (LookupError, AttributeError): - raise AssertionError("The character encoding '%s' is not supported for conversion." % (encoding,)) -filesystem_encoding = None -io_encoding = None +# On Windows we install UTF-8 stream wrappers for sys.stdout and +# sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). +# +# On POSIX, we are moving towards a UTF-8-everything and ignore the locale. +io_encoding = "utf-8" + is_unicode_platform = False use_unicode_filepath = False +filesystem_encoding = "mbcs" if sys.platform == "win32" else "utf-8" def _reload(): - global filesystem_encoding, io_encoding, is_unicode_platform, use_unicode_filepath - - filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) - check_encoding(filesystem_encoding) - - if sys.platform == 'win32': - # On Windows we install UTF-8 stream wrappers for sys.stdout and - # sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). - io_encoding = 'utf-8' - else: - ioenc = None - if hasattr(sys.stdout, 'encoding'): - ioenc = sys.stdout.encoding - if ioenc is None: - try: - ioenc = locale.getpreferredencoding() - except Exception: - pass # work around - io_encoding = canonical_encoding(ioenc) - - check_encoding(io_encoding) + global is_unicode_platform, use_unicode_filepath, filesystem_encoding is_unicode_platform = PY3 or sys.platform in ["win32", "darwin"] From 15c46924ce8200ed88f45cb20116417f794c60d5 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:27:20 -0500 Subject: [PATCH 103/201] unused import --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 289874213..35bf26e0c 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -19,7 +19,7 @@ if PY2: from past.builtins import unicode -import sys, os, re, locale +import sys, os, re import unicodedata import warnings From 2889922a080771e0a1bb2dd28959929773df5eab Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:27:23 -0500 Subject: [PATCH 104/201] reign in scope - don't mess with filesystem encoding here It is a separate can of works from argv --- src/allmydata/util/encodingutil.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 35bf26e0c..32049b57f 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -32,6 +32,16 @@ from allmydata.util.fileutil import abspath_expanduser_unicode NoneType = type(None) +def check_encoding(encoding): + # sometimes Python returns an encoding name that it doesn't support for conversion + # fail early if this happens + try: + u"test".encode(encoding) + except (LookupError, AttributeError): + raise AssertionError( + "The character encoding '%s' is not supported for conversion." % (encoding,), + ) + def canonical_encoding(encoding): if encoding is None: log.msg("Warning: falling back to UTF-8 encoding.", level=log.WEIRD) @@ -53,11 +63,12 @@ io_encoding = "utf-8" is_unicode_platform = False use_unicode_filepath = False -filesystem_encoding = "mbcs" if sys.platform == "win32" else "utf-8" +filesystem_encoding = None def _reload(): - global is_unicode_platform, use_unicode_filepath, filesystem_encoding + global filesystem_encoding, is_unicode_platform, use_unicode_filepath + filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) is_unicode_platform = PY3 or sys.platform in ["win32", "darwin"] # Despite the Unicode-mode FilePath support added to Twisted in From a9a60857b2c5ee2a811eda6562e6e3c31c0b727c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:28:50 -0500 Subject: [PATCH 105/201] attempt to reduce diff noise --- src/allmydata/util/encodingutil.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 32049b57f..168f40a58 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -32,16 +32,6 @@ from allmydata.util.fileutil import abspath_expanduser_unicode NoneType = type(None) -def check_encoding(encoding): - # sometimes Python returns an encoding name that it doesn't support for conversion - # fail early if this happens - try: - u"test".encode(encoding) - except (LookupError, AttributeError): - raise AssertionError( - "The character encoding '%s' is not supported for conversion." % (encoding,), - ) - def canonical_encoding(encoding): if encoding is None: log.msg("Warning: falling back to UTF-8 encoding.", level=log.WEIRD) @@ -54,6 +44,15 @@ def canonical_encoding(encoding): return encoding +def check_encoding(encoding): + # sometimes Python returns an encoding name that it doesn't support for conversion + # fail early if this happens + try: + u"test".encode(encoding) + except (LookupError, AttributeError): + raise AssertionError( + "The character encoding '%s' is not supported for conversion." % (encoding,), + ) # On Windows we install UTF-8 stream wrappers for sys.stdout and # sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). From 7c0d2e3cd5cd3cc780882b6c583050f2fbe49e4e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:29:24 -0500 Subject: [PATCH 106/201] another un-re-shuffling --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 168f40a58..1c884a88d 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -60,9 +60,9 @@ def check_encoding(encoding): # On POSIX, we are moving towards a UTF-8-everything and ignore the locale. io_encoding = "utf-8" +filesystem_encoding = None is_unicode_platform = False use_unicode_filepath = False -filesystem_encoding = None def _reload(): global filesystem_encoding, is_unicode_platform, use_unicode_filepath From ae1a0c591bd5d3b1d2f69604a16dd77c568d863b Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 09:58:34 -0500 Subject: [PATCH 107/201] Prefer to fix unicode_to_argv/argv_to_unicode instead of callers --- src/allmydata/scripts/cli.py | 42 +++++++++++++-------------- src/allmydata/scripts/create_node.py | 6 ++-- src/allmydata/test/cli/common.py | 5 ++-- src/allmydata/test/cli/test_backup.py | 5 ++-- src/allmydata/test/cli/test_put.py | 6 ++-- src/allmydata/test/common_util.py | 2 +- src/allmydata/test/test_runner.py | 11 +++---- src/allmydata/test/test_system.py | 4 +-- src/allmydata/util/encodingutil.py | 27 +++++++++++++++++ 9 files changed, 64 insertions(+), 44 deletions(-) diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index c00917022..eeae20fe1 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -4,7 +4,7 @@ import os.path, re, fnmatch from twisted.python import usage from allmydata.scripts.common import get_aliases, get_default_nodedir, \ DEFAULT_ALIAS, BaseOptions -from allmydata.util.encodingutil import argv_to_abspath, quote_local_unicode_path +from allmydata.util.encodingutil import argv_to_unicode, argv_to_abspath, quote_local_unicode_path from .tahoe_status import TahoeStatusCommand NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") @@ -55,7 +55,7 @@ class MakeDirectoryOptions(FileStoreOptions): ] def parseArgs(self, where=""): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF"): @@ -66,7 +66,7 @@ class MakeDirectoryOptions(FileStoreOptions): class AddAliasOptions(FileStoreOptions): def parseArgs(self, alias, cap): - self.alias = unicode(alias, "utf-8") + self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] self.cap = cap @@ -76,7 +76,7 @@ class AddAliasOptions(FileStoreOptions): class CreateAliasOptions(FileStoreOptions): def parseArgs(self, alias): - self.alias = unicode(alias, "utf-8") + self.alias = argv_to_unicode(alias) if self.alias.endswith(u':'): self.alias = self.alias[:-1] @@ -100,7 +100,7 @@ class ListOptions(FileStoreOptions): ("json", None, "Show the raw JSON output."), ] def parseArgs(self, where=""): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [PATH]" @@ -142,7 +142,7 @@ class GetOptions(FileStoreOptions): if arg2 == "-": arg2 = None - self.from_file = unicode(arg1, "utf-8") + self.from_file = argv_to_unicode(arg1) self.to_file = None if arg2 is None else argv_to_abspath(arg2) synopsis = "[options] REMOTE_FILE LOCAL_FILE" @@ -175,7 +175,7 @@ class PutOptions(FileStoreOptions): arg1 = None self.from_file = None if arg1 is None else argv_to_abspath(arg1) - self.to_file = None if arg2 is None else unicode(arg2, "utf-8") + self.to_file = None if arg2 is None else argv_to_unicode(arg2) if self['format']: if self['format'].upper() not in ("SDMF", "MDMF", "CHK"): @@ -218,8 +218,8 @@ class CpOptions(FileStoreOptions): def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = list(unicode(a, "utf-8") for a in args[:-1]) - self.destination = unicode(args[-1], "utf-8") + self.sources = map(argv_to_unicode, args[:-1]) + self.destination = argv_to_unicode(args[-1]) synopsis = "[options] FROM.. TO" @@ -255,15 +255,15 @@ class CpOptions(FileStoreOptions): class UnlinkOptions(FileStoreOptions): def parseArgs(self, where): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] REMOTE_FILE" description = "Remove a named file from its parent directory." class MvOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = unicode(frompath, "utf-8") - self.to_file = unicode(topath, "utf-8") + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) synopsis = "[options] FROM TO" @@ -281,8 +281,8 @@ class MvOptions(FileStoreOptions): class LnOptions(FileStoreOptions): def parseArgs(self, frompath, topath): - self.from_file = unicode(frompath, "utf-8") - self.to_file = unicode(topath, "utf-8") + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) synopsis = "[options] FROM_LINK TO_LINK" @@ -328,14 +328,14 @@ class BackupOptions(FileStoreOptions): def parseArgs(self, localdir, topath): self.from_dir = argv_to_abspath(localdir) - self.to_dir = unicode(topath, "utf-8") + self.to_dir = argv_to_unicode(topath) synopsis = "[options] FROM ALIAS:TO" def opt_exclude(self, pattern): """Ignore files matching a glob pattern. You may give multiple '--exclude' options.""" - g = unicode(pattern, "utf-8").strip() + g = argv_to_unicode(pattern).strip() if g: exclude = self['exclude'] exclude.add(g) @@ -385,7 +385,7 @@ class WebopenOptions(FileStoreOptions): ("info", "i", "Open the t=info page for the file"), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" @@ -402,7 +402,7 @@ class ManifestOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed."), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -414,7 +414,7 @@ class StatsOptions(FileStoreOptions): ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = unicode(where, "utf-8") + self.where = argv_to_unicode(where) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -429,7 +429,7 @@ class CheckOptions(FileStoreOptions): ("add-lease", None, "Add/renew lease on all shares."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = map(argv_to_unicode, locations) synopsis = "[options] [ALIAS:PATH]" description = """ @@ -446,7 +446,7 @@ class DeepCheckOptions(FileStoreOptions): ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, *locations): - self.locations = list(unicode(a, "utf-8") for a in locations) + self.locations = map(argv_to_unicode, locations) synopsis = "[options] [ALIAS:PATH]" description = """ diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py index ed4f0c71d..ac17cf445 100644 --- a/src/allmydata/scripts/create_node.py +++ b/src/allmydata/scripts/create_node.py @@ -16,7 +16,7 @@ from allmydata.scripts.common import ( ) from allmydata.scripts.default_nodedir import _default_nodedir from allmydata.util.assertutil import precondition -from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path, get_io_encoding +from allmydata.util.encodingutil import listdir_unicode, argv_to_unicode, quote_local_unicode_path, get_io_encoding from allmydata.util import fileutil, i2p_provider, iputil, tor_provider from wormhole import wormhole @@ -238,7 +238,7 @@ def write_node_config(c, config): c.write("\n") c.write("[node]\n") - nickname = unicode(config.get("nickname") or "", "utf-8") + nickname = argv_to_unicode(config.get("nickname") or "") c.write("nickname = %s\n" % (nickname.encode('utf-8'),)) if config["hide-ip"]: c.write("reveal-IP-address = false\n") @@ -246,7 +246,7 @@ def write_node_config(c, config): c.write("reveal-IP-address = true\n") # TODO: validate webport - webport = unicode(config.get("webport") or "none", "utf-8") + webport = argv_to_unicode(config.get("webport") or "none") if webport.lower() == "none": webport = "" c.write("web.port = %s\n" % (webport.encode('utf-8'),)) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index 13445ef0a..bf175de44 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,5 +1,4 @@ -from six import ensure_str - +from ...util.encodingutil import unicode_to_argv from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -46,6 +45,6 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = ensure_str(self.get_clientdir(i=client_num)) + client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index 6aecd0af6..ceecbd662 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -1,5 +1,4 @@ import os.path -from six import ensure_str from six.moves import cStringIO as StringIO from datetime import timedelta import re @@ -10,7 +9,7 @@ from twisted.python.monkey import MonkeyPatcher import __builtin__ from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import get_io_encoding +from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.namespace import Namespace from allmydata.scripts import cli, backupdb from ..common_util import StallMixin @@ -414,7 +413,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): return StringIO() patcher = MonkeyPatcher((__builtin__, 'file', call_file)) - patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', ensure_str(exclude_file), 'from', 'to']) + patcher.runWithPatches(parse_options, basedir, "backup", ['--exclude-from', unicode_to_argv(exclude_file), 'from', 'to']) self.failUnless(ns.called) def test_ignore_symlinks(self): diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 2deafb784..31eb671bb 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,7 +1,5 @@ import os.path -from six import ensure_str - from twisted.trial import unittest from twisted.python import usage @@ -10,7 +8,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding +from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin @@ -50,7 +48,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid(oneshare=True) rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = ensure_str(abspath_expanduser_unicode(unicode(rel_fn))) + abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") d = self.do_cli("put", rel_fn) diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index 7b3194d3f..2a70cff3a 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -76,7 +76,7 @@ def run_cli_native(verb, *args, **kwargs): encoding = kwargs.pop("encoding", None) precondition( all(isinstance(arg, native_str) for arg in [verb] + nodeargs + list(args)), - "arguments to run_cli must be a native string -- convert using UTF-8", + "arguments to run_cli must be a native string -- convert using unicode_to_argv", verb=verb, args=args, nodeargs=nodeargs, diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 4054dc289..2f0ac0cbe 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -23,6 +23,7 @@ from twisted.python.runtime import ( platform, ) from allmydata.util import fileutil, pollmixin +from allmydata.util.encodingutil import unicode_to_argv, get_filesystem_encoding from allmydata.test import common_util import allmydata from .common import ( @@ -71,16 +72,12 @@ def run_bintahoe(extra_argv, python_options=None): :return: A three-tuple of stdout (unicode), stderr (unicode), and the child process "returncode" (int). """ - argv = [sys.executable] + argv = [sys.executable.decode(get_filesystem_encoding())] if python_options is not None: argv.extend(python_options) argv.extend([u"-m", u"allmydata.scripts.runner"]) argv.extend(extra_argv) - if not platform.isWindows(): - # On POSIX Popen (via execvp) will encode argv using the "filesystem" - # encoding. Depending on LANG this may make our unicode arguments - # unencodable. Do our own UTF-8 encoding here instead. - argv = list(arg.encode("utf-8") for arg in argv) + argv = list(unicode_to_argv(arg) for arg in argv) p = Popen(argv, stdout=PIPE, stderr=PIPE) out = p.stdout.read().decode("utf-8") err = p.stderr.read().decode("utf-8") @@ -109,7 +106,7 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): # -t is a harmless option that warns about tabs so we can add it # -without impacting other behavior noticably. - out, err, returncode = run_bintahoe(["--version"], python_options=["-t"]) + out, err, returncode = run_bintahoe([u"--version"], python_options=[u"-t"]) self.assertEqual(returncode, 0) self.assertTrue(out.startswith(allmydata.__appname__ + '/')) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 03b9ba2de..75219004b 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -35,7 +35,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil, pollmixin, fileutil from allmydata.util import log, base32 -from allmydata.util.encodingutil import quote_output +from allmydata.util.encodingutil import quote_output, unicode_to_argv from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data from allmydata.interfaces import IDirectoryNode, IFileNode, \ @@ -2185,7 +2185,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): log.msg("test_system.SystemTest._test_runner using %r" % filename) rc,output,err = yield run_cli("debug", "dump-share", "--offsets", - ensure_str(filename)) + unicode_to_argv(filename)) self.failUnlessEqual(rc, 0) # we only upload a single file, so we can assert some things about diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 1c884a88d..c5a8639e8 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -18,6 +18,7 @@ if PY2: from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 from past.builtins import unicode +from six import ensure_str import sys, os, re import unicodedata @@ -106,6 +107,32 @@ def argv_to_abspath(s, **kwargs): % (quote_output(s), quote_output(os.path.join('.', s)))) return abspath_expanduser_unicode(decoded, **kwargs) + +def unicode_to_argv(s, mangle=False): + """ + Make the given unicode string suitable for use in an argv list. + + On Python 2, this encodes using UTF-8. On Python 3, this returns the + input unmodified. + """ + precondition(isinstance(s, unicode), s) + return ensure_str(s) + + +def argv_to_unicode(s): + """ + Perform the inverse of ``unicode_to_argv``. + """ + if isinstance(s, unicode): + return s + precondition(isinstance(s, bytes), s) + + try: + return unicode(s, io_encoding) + except UnicodeDecodeError: + raise usage.UsageError("Argument %s cannot be decoded as %s." % + (quote_output(s), io_encoding)) + def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 3dadd47416cc3338e7b17e035c7c8b9fcc179f8e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:11:20 -0500 Subject: [PATCH 108/201] unused import --- src/allmydata/windows/fixups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index d34404aed..b4204b5d3 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -1,6 +1,6 @@ from __future__ import print_function -import codecs, re +import codecs from functools import partial from ctypes import WINFUNCTYPE, windll, POINTER, c_int, WinError, byref, get_last_error From 8f498437cf22976f7033be7eed4731478b4baa7c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:11:23 -0500 Subject: [PATCH 109/201] whitespace --- src/allmydata/test/cli/test_put.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 31eb671bb..08a66f98d 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -1,5 +1,4 @@ import os.path - from twisted.trial import unittest from twisted.python import usage From db31d2bc1a85fa454aff57de8f390bed48826485 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:14:38 -0500 Subject: [PATCH 110/201] news fragment --- newsfragments/3588.incompat | 1 + 1 file changed, 1 insertion(+) create mode 100644 newsfragments/3588.incompat diff --git a/newsfragments/3588.incompat b/newsfragments/3588.incompat new file mode 100644 index 000000000..402ae8479 --- /dev/null +++ b/newsfragments/3588.incompat @@ -0,0 +1 @@ +The Tahoe command line now always uses UTF-8 to decode its arguments, regardless of locale. From 82d24bfaf7662c989816765e52d2b3fe962762dc Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 10:46:04 -0500 Subject: [PATCH 111/201] one more --- src/allmydata/util/encodingutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index c5a8639e8..48d5cc7b4 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -101,7 +101,7 @@ def argv_to_abspath(s, **kwargs): Convenience function to decode an argv element to an absolute path, with ~ expanded. If this fails, raise a UsageError. """ - decoded = unicode(s, "utf-8") + decoded = argv_to_unicode(s) if decoded.startswith(u'-'): raise usage.UsageError("Path argument %s cannot start with '-'.\nUse %s if you intended to refer to a file." % (quote_output(s), quote_output(os.path.join('.', s)))) From aa4f1130270191ed3b8992e370ba684b5d5d5136 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:11:43 -0500 Subject: [PATCH 112/201] Get the monkey patching right --- src/allmydata/test/test_encodingutil.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 992ebd690..da8ee8618 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -96,10 +96,14 @@ class MockStdout(object): class EncodingUtilNonUnicodePlatform(unittest.TestCase): @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") def setUp(self): - # Mock sys.platform because unicode_platform() uses it - self.patch(sys, "platform", "linux") + # Make sure everything goes back to the way it was at the end of the + # test. self.addCleanup(_reload) + # Mock sys.platform because unicode_platform() uses it. Cleanups run + # in reverse order so we do this second so it gets undone first. + self.patch(sys, "platform", "linux") + def test_listdir_unicode(self): # What happens if latin1-encoded filenames are encountered on an UTF-8 # filesystem? @@ -131,8 +135,8 @@ class EncodingUtilNonUnicodePlatform(unittest.TestCase): class EncodingUtil(ReallyEqualMixin): def setUp(self): - self.patch(sys, "platform", self.platform) self.addCleanup(_reload) + self.patch(sys, "platform", self.platform) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") From 46d3ffb2e287217e2afb7977bf3e41e6521ddab1 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:20:50 -0500 Subject: [PATCH 113/201] diff shrink --- src/allmydata/util/encodingutil.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 48d5cc7b4..6d4cd3a8f 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -96,6 +96,20 @@ def get_io_encoding(): """ return io_encoding +def argv_to_unicode(s): + """ + Perform the inverse of ``unicode_to_argv``. + """ + if isinstance(s, unicode): + return s + precondition(isinstance(s, bytes), s) + + try: + return unicode(s, io_encoding) + except UnicodeDecodeError: + raise usage.UsageError("Argument %s cannot be decoded as %s." % + (quote_output(s), io_encoding)) + def argv_to_abspath(s, **kwargs): """ Convenience function to decode an argv element to an absolute path, with ~ expanded. @@ -119,20 +133,6 @@ def unicode_to_argv(s, mangle=False): return ensure_str(s) -def argv_to_unicode(s): - """ - Perform the inverse of ``unicode_to_argv``. - """ - if isinstance(s, unicode): - return s - precondition(isinstance(s, bytes), s) - - try: - return unicode(s, io_encoding) - except UnicodeDecodeError: - raise usage.UsageError("Argument %s cannot be decoded as %s." % - (quote_output(s), io_encoding)) - def unicode_to_url(s): """ Encode an unicode object used in an URL to bytes. From 99f00818a8eecec76717d977d051c1b0bdac5cb6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:21:32 -0500 Subject: [PATCH 114/201] diff shrink --- src/allmydata/util/encodingutil.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 6d4cd3a8f..679ad2055 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -98,10 +98,13 @@ def get_io_encoding(): def argv_to_unicode(s): """ - Perform the inverse of ``unicode_to_argv``. + Decode given argv element to unicode. If this fails, raise a UsageError. + + This is the inverse of ``unicode_to_argv``. """ if isinstance(s, unicode): return s + precondition(isinstance(s, bytes), s) try: From 7ca3c86a3501b7ad7c702cd76c2d81d9cd9328c0 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:35:03 -0500 Subject: [PATCH 115/201] debug nonsense --- src/allmydata/scripts/runner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 1f993fda1..2a41d5cf5 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -119,7 +119,8 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=stdout) + for f in stdout, open("debug.txt", "wt"): + print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) sys.exit(1) return config From ec92f0362d178fd125daaeb35e386502c5712eb2 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:36:42 -0500 Subject: [PATCH 116/201] this? --- src/allmydata/scripts/runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 2a41d5cf5..5af4083cb 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -119,7 +119,7 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - for f in stdout, open("debug.txt", "wt"): + for f in stdout, open("debug.txt", "wb"): print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) sys.exit(1) return config From 183ee10035cf59e427bee91ced6a66f9fb2a276e Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:39:56 -0500 Subject: [PATCH 117/201] probably more useful debug info --- src/allmydata/scripts/runner.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 5af4083cb..ee0811ea5 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -107,6 +107,8 @@ def parse_options(argv, config=None): return config def parse_or_exit_with_explanation(argv, stdout=sys.stdout): + with open("argv-debug.txt", "wt") as f: + print(repr(argv), file=f) config = Options() try: parse_options(argv, config=config) @@ -119,8 +121,7 @@ def parse_or_exit_with_explanation(argv, stdout=sys.stdout): msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) - for f in stdout, open("debug.txt", "wb"): - print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=f) + print("%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)), file=stdout) sys.exit(1) return config From e3a805caa724f5f3c9c9948c95bd71f995712dff Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:44:00 -0500 Subject: [PATCH 118/201] unicode_to_argv == id on win32 --- src/allmydata/util/encodingutil.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 679ad2055..20fecf4a1 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -129,10 +129,12 @@ def unicode_to_argv(s, mangle=False): """ Make the given unicode string suitable for use in an argv list. - On Python 2, this encodes using UTF-8. On Python 3, this returns the - input unmodified. + On Python 2 on POSIX, this encodes using UTF-8. On Python 3 and on + Windows, this returns the input unmodified. """ precondition(isinstance(s, unicode), s) + if sys.platform == "win32": + return s return ensure_str(s) From 622d67c9b937ed9ea8605c0ddd6149b255067726 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:55:40 -0500 Subject: [PATCH 119/201] done with this, thanks --- src/allmydata/scripts/runner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index ee0811ea5..1f993fda1 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -107,8 +107,6 @@ def parse_options(argv, config=None): return config def parse_or_exit_with_explanation(argv, stdout=sys.stdout): - with open("argv-debug.txt", "wt") as f: - print(repr(argv), file=f) config = Options() try: parse_options(argv, config=config) From 522f96b150cb11d0f6ddf0c00c42744d262b01b6 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:56:37 -0500 Subject: [PATCH 120/201] may as well leave(/restore) this --- src/allmydata/util/encodingutil.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 20fecf4a1..28458c9dc 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -133,6 +133,9 @@ def unicode_to_argv(s, mangle=False): Windows, this returns the input unmodified. """ precondition(isinstance(s, unicode), s) + if PY3: + warnings.warn("This will be unnecessary once Python 2 is dropped.", + DeprecationWarning) if sys.platform == "win32": return s return ensure_str(s) From 5a145e74ef59c02cf72c36efb90b180eb49c913c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 14:59:16 -0500 Subject: [PATCH 121/201] a mild warning/suggestion here --- src/allmydata/test/test_system.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 75219004b..bf115f127 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -73,6 +73,9 @@ from ..scripts.common import ( class RunBinTahoeMixin(object): def run_bintahoe(self, args, stdin=None, python_options=[], env=None): + # test_runner.run_bintahoe has better unicode support but doesn't + # support env yet and is also synchronous. If we could get rid of + # this in favor of that, though, it would probably be an improvement. command = sys.executable argv = python_options + ["-m", "allmydata.scripts.runner"] + args From 44d76cb159b4d75312c1bcd22cd0bfa9010e620d Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:00:02 -0500 Subject: [PATCH 122/201] fix formatting mistake --- src/allmydata/test/test_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index 2f0ac0cbe..cf56e8baa 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -105,7 +105,7 @@ class BinTahoe(common_util.SignalMixin, unittest.TestCase): # we have to have our own implementation of skipping these options. # -t is a harmless option that warns about tabs so we can add it - # -without impacting other behavior noticably. + # without impacting other behavior noticably. out, err, returncode = run_bintahoe([u"--version"], python_options=[u"-t"]) self.assertEqual(returncode, 0) self.assertTrue(out.startswith(allmydata.__appname__ + '/')) From 9c63703efc23161bda63f863963dc4f5f75cdc64 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:15:42 -0500 Subject: [PATCH 123/201] no effort being made to support these locales --- src/allmydata/test/test_encodingutil.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index da8ee8618..06340496b 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -492,20 +492,6 @@ class MacOSXLeopard(EncodingUtil, unittest.TestCase): io_encoding = 'UTF-8' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] -class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase): - uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' - platform = 'darwin' - filesystem_encoding = 'utf-8' - io_encoding = 'US-ASCII' - dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] - -class OpenBSD(EncodingUtil, unittest.TestCase): - uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' - platform = 'openbsd4' - filesystem_encoding = '646' - io_encoding = '646' - # Oops, I cannot write filenames containing non-ascii characters - class TestToFromStr(ReallyEqualMixin, unittest.TestCase): def test_to_bytes(self): From 6c430bd4e60a0e2f42a66ecf024edf0ae5b430de Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:16:18 -0500 Subject: [PATCH 124/201] re-add a direct unicode_to_argv test harder to express the conditional in skips so the two tests become one --- src/allmydata/test/test_encodingutil.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 06340496b..f7987d466 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -85,8 +85,8 @@ from allmydata.util.encodingutil import unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ - to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from - + to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ + unicode_to_argv class MockStdout(object): pass @@ -157,6 +157,20 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_output_py3(self): self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) + def test_unicode_to_argv(self): + """ + unicode_to_argv() returns its unicode argument on Windows and Python 2 and + converts to bytes using UTF-8 elsewhere. + """ + result = unicode_to_argv(lumiere_nfc) + if PY3 or self.platform == "win32": + expected_value = lumiere_nfc + else: + expected_value = lumiere_nfc.encode(self.io_encoding) + + self.assertIsInstance(result, type(expected_value)) + self.assertEqual(result, expected_value) + @skipIf(PY3, "Python 3 only.") def test_unicode_platform_py2(self): matrix = { From 6984f2be3ffc709aff663e69291d3ed998dd2599 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 15:58:07 -0500 Subject: [PATCH 125/201] Try to get the Python 2 / Windows case working --- src/allmydata/test/cli/common.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index bf175de44..f1c48d1af 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,4 +1,5 @@ -from ...util.encodingutil import unicode_to_argv +from six import ensure_str + from ...scripts import runner from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode @@ -45,6 +46,12 @@ class CLITestMixin(ReallyEqualMixin): # client_num is used to execute client CLI commands on a specific # client. client_num = kwargs.pop("client_num", 0) - client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) + # If we were really going to launch a child process then + # `unicode_to_argv` would be the right thing to do here. However, + # we're just going to call some Python functions directly and those + # Python functions want native strings. So ignore the requirements + # for passing arguments to another process and make sure this argument + # is a native string. + client_dir = ensure_str(self.get_clientdir(i=client_num)) nodeargs = [ b"--node-directory", client_dir ] return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) From 43dc85501f7ba57f38c33eb73a5346e5cf670e44 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 16:03:28 -0500 Subject: [PATCH 126/201] is this api less troublesome? --- src/allmydata/test/cli/test_put.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index 08a66f98d..fadc09c06 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -46,21 +46,21 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): self.basedir = "cli/Put/unlinked_immutable_from_file" self.set_up_grid(oneshare=True) - rel_fn = os.path.join(self.basedir, "DATAFILE") - abs_fn = unicode_to_argv(abspath_expanduser_unicode(unicode(rel_fn))) + rel_fn = unicode(os.path.join(self.basedir, "DATAFILE")) + abs_fn = abspath_expanduser_unicode(rel_fn) # we make the file small enough to fit in a LIT file, for speed fileutil.write(rel_fn, "short file") - d = self.do_cli("put", rel_fn) + d = self.do_cli_unicode(u"put", [rel_fn]) def _uploaded(args): (rc, out, err) = args readcap = out self.failUnless(readcap.startswith("URI:LIT:"), readcap) self.readcap = readcap d.addCallback(_uploaded) - d.addCallback(lambda res: self.do_cli("put", "./" + rel_fn)) + d.addCallback(lambda res: self.do_cli_unicode(u"put", [u"./" + rel_fn])) d.addCallback(lambda rc_stdout_stderr: self.failUnlessReallyEqual(rc_stdout_stderr[1], self.readcap)) - d.addCallback(lambda res: self.do_cli("put", abs_fn)) + d.addCallback(lambda res: self.do_cli_unicode(u"put", [abs_fn])) d.addCallback(lambda rc_stdout_stderr: self.failUnlessReallyEqual(rc_stdout_stderr[1], self.readcap)) # we just have to assume that ~ is handled properly From 216efb2aed019fe893b760dd4e40780da4a202e3 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 12 Jan 2021 16:52:43 -0500 Subject: [PATCH 127/201] unused import --- src/allmydata/test/cli/test_put.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/cli/test_put.py b/src/allmydata/test/cli/test_put.py index fadc09c06..3392e67b4 100644 --- a/src/allmydata/test/cli/test_put.py +++ b/src/allmydata/test/cli/test_put.py @@ -7,7 +7,7 @@ from allmydata.scripts.common import get_aliases from allmydata.scripts import cli from ..no_network import GridTestMixin from ..common_util import skip_if_cannot_represent_filename -from allmydata.util.encodingutil import get_io_encoding, unicode_to_argv +from allmydata.util.encodingutil import get_io_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from .common import CLITestMixin From 512897eca0310ce6ddfe262520f3c21bcf345ccf Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 19 Jan 2021 13:46:32 -0500 Subject: [PATCH 128/201] news fragment --- newsfragments/3592.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3592.minor diff --git a/newsfragments/3592.minor b/newsfragments/3592.minor new file mode 100644 index 000000000..e69de29bb From 61d5f920bb9db703edb9f3aed73f2d0488ee0d2c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 19 Jan 2021 14:28:16 -0500 Subject: [PATCH 129/201] Add tests for the tag construction code and make it a bit safer Check for sane inputs, reject insane ones --- src/allmydata/test/test_hashutil.py | 36 +++++++++++++++++++++++++++++ src/allmydata/util/hashutil.py | 29 ++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_hashutil.py b/src/allmydata/test/test_hashutil.py index 6ec861c9f..6ac73ae60 100644 --- a/src/allmydata/test/test_hashutil.py +++ b/src/allmydata/test/test_hashutil.py @@ -126,6 +126,42 @@ class HashUtilTests(unittest.TestCase): base32.a2b(b"2ckv3dfzh6rgjis6ogfqhyxnzy"), ) + def test_convergence_hasher_tag(self): + """ + ``_convergence_hasher_tag`` constructs the convergence hasher tag from a + unique prefix, the required, total, and segment size parameters, and a + convergence secret. + """ + self.assertEqual( + "allmydata_immutable_content_to_key_with_added_secret_v1+" + "16:\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42," + "9:3,10,1024,", + hashutil._convergence_hasher_tag( + k=3, + n=10, + segsize=1024, + convergence=b"\x42" * 16, + ), + ) + + def test_convergence_hasher_out_of_bounds(self): + """ + ``_convergence_hasher_tag`` raises ``ValueError`` if k or n is not between + 1 and 256 inclusive or if k is greater than n. + """ + segsize = 1024 + secret = b"\x42" * 16 + for bad_k in (0, 2, 257): + with self.assertRaises(ValueError): + hashutil._convergence_hasher_tag( + k=bad_k, n=1, segsize=segsize, convergence=secret, + ) + for bad_n in (0, 1, 257): + with self.assertRaises(ValueError): + hashutil._convergence_hasher_tag( + k=2, n=bad_n, segsize=segsize, convergence=secret, + ) + def test_known_answers(self): """ Verify backwards compatibility by comparing hash outputs for some diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index ebb2f12af..f82c04efd 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -176,10 +176,37 @@ def convergence_hash(k, n, segsize, data, convergence): return h.digest() -def convergence_hasher(k, n, segsize, convergence): +def _convergence_hasher_tag(k, n, segsize, convergence): + """ + Create the convergence hashing tag. + + :param int k: Required shares. + :param int n: Total shares. + :param int segsize: Maximum segment size. + :param bytes convergence: The convergence secret. + + :return bytes: The bytestring to use as a tag in the convergence hash. + """ assert isinstance(convergence, bytes) + if k > n: + raise ValueError( + "k > n not allowed; k = {}, n = {}".format(k, n), + ) + if k < 1 or n < 1: + raise ValueError( + "k, n < 1 not allowed; k = {}, n = {}".format(k, n), + ) + if k > 256 or n > 256: + raise ValueError( + "k, n > 256 not allowed; k = {}, n = {}".format(k, n), + ) param_tag = netstring(b"%d,%d,%d" % (k, n, segsize)) tag = CONVERGENT_ENCRYPTION_TAG + netstring(convergence) + param_tag + return tag + + +def convergence_hasher(k, n, segsize, convergence): + tag = _convergence_hasher_tag(k, n, segsize, convergence) return tagged_hasher(tag, KEYLEN) From 11e4bcf47680f20eb9015df069ed30fd8c71ee0c Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 19 Jan 2021 14:41:58 -0500 Subject: [PATCH 130/201] Add a direct unit test for FileHandle.get_encryption_key --- src/allmydata/test/test_upload.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 94d7575c3..664e4cc94 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -14,6 +14,9 @@ if PY2: import os, shutil from io import BytesIO +from base64 import ( + b64encode, +) from twisted.trial import unittest from twisted.python.failure import Failure @@ -877,6 +880,34 @@ def is_happy_enough(servertoshnums, h, k): return True +class FileHandleTests(unittest.TestCase): + """ + Tests for ``FileHandle``. + """ + def test_get_encryption_key_convergent(self): + """ + When ``FileHandle`` is initialized with a convergence secret, + ``FileHandle.get_encryption_key`` returns a deterministic result that + is a function of that secret. + """ + secret = b"\x42" * 16 + handle = upload.FileHandle(BytesIO(b"hello world"), secret) + handle.set_default_encoding_parameters({ + "k": 3, + "happy": 5, + "n": 10, + # Remember this is the *max* segment size. In reality, the data + # size is much smaller so the actual segment size incorporated + # into the encryption key is also smaller. + "max_segment_size": 128 * 1024, + }) + + self.assertEqual( + b64encode(self.successResultOf(handle.get_encryption_key())), + b"oBcuR/wKdCgCV2GKKXqiNg==", + ) + + class EncodingParameters(GridTestMixin, unittest.TestCase, SetDEPMixin, ShouldFailMixin): From be5cf1a0bea687d21ded82a2bfdc2c6301afae73 Mon Sep 17 00:00:00 2001 From: Jean-Paul Calderone Date: Tue, 19 Jan 2021 14:42:30 -0500 Subject: [PATCH 131/201] news fragment --- newsfragments/3593.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3593.minor diff --git a/newsfragments/3593.minor b/newsfragments/3593.minor new file mode 100644 index 000000000..e69de29bb From b0cb50b8973be87b0af3ba553f8cfb530c2b8b90 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 20 Dec 2020 19:09:34 -0700 Subject: [PATCH 132/201] write verification instructions, and developer statement --- docs/INSTALL.rst | 37 ++++++++++++++++++++++++++++++- docs/developer-release-signatures | 25 +++++++++++++++++++++ 2 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 docs/developer-release-signatures diff --git a/docs/INSTALL.rst b/docs/INSTALL.rst index 3a724b790..568869407 100644 --- a/docs/INSTALL.rst +++ b/docs/INSTALL.rst @@ -173,7 +173,9 @@ from PyPI with ``venv/bin/pip install tahoe-lafs``. After installation, run Install From a Source Tarball ----------------------------- -You can also install directly from the source tarball URL:: +You can also install directly from the source tarball URL. To verify +signatures, first see verifying_signatures_ and replace the URL in the +following instructions with the local filename. % virtualenv venv New python executable in ~/venv/bin/python2.7 @@ -189,6 +191,39 @@ You can also install directly from the source tarball URL:: tahoe-lafs: 1.14.0 ... +.. _verifying_signatures: + +Verifying Signatures +-------------------- + +First download the source tarball and then any signatures. There are several +developers who are expected to produce signatures for a release. *At least +two signatures should be verified*. + +This statement, signed by the existing Tahoe release-signing key, attests to +those developers authorized to sign a Tahoe release: + +.. include:: developer-release-signatures + :code: + +Signatures are made available beside the release. So for example, a release +like ``https://tahoe-lafs.org/downloads/tahoe-lafs-1.16.0.tar.bz2`` might +have signatures ``tahoe-lafs-1.16.0.tar.bz2.meejah.asc`` and +``tahoe-lafs-1.16.0.tar.bz2.warner.asc``. + +To verify the signatures using GnuPG:: + + % gpg --verify tahoe-lafs-1.16.0.tar.bz2.meejah.asc tahoe-lafs-1.16.0.tar.bz2 + gpg: Signature made XXX + gpg: using RSA key 9D5A2BD5688ECB889DEBCD3FC2602803128069A7 + gpg: Good signature from "meejah " [full] + % gpg --verify tahoe-lafs-1.16.0.tar.bz2.warner.asc tahoe-lafs-1.16.0.tar.bz2 + gpg: Signature made XXX + gpg: using RSA key 967EFE06699872411A77DF36D43B4C9C73225AAF + gpg: Good signature from "Brian Warner " [full] + + + Extras ------ diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures new file mode 100644 index 000000000..d79d01fab --- /dev/null +++ b/docs/developer-release-signatures @@ -0,0 +1,25 @@ +TODO: clear-sign this with the release key + + +Any two of the following core Tahoe contributers may sign a +release. They each independantly produce a signature which are made +available beside Tahoe releases after 1.15.0 + +This statement is signed by the previous Tahoe release key. Any future +such statements may be signed by it OR by any two developers (for +example, to add or remove developers from the list). + +meejah +0xC2602803128069A7 +9D5A 2BD5 688E CB88 9DEB CD3F C260 2803 1280 69A7 +https://meejah.ca/meejah.asc + +jean-paul calderone +0x?? +fingerprint +[url for key] + +brian warner +0xD43B4C9C73225AAF +967E FE06 6998 7241 1A77 DF36 D43B 4C9C 7322 5AAF +http://www.lothar.com/warner-gpg.html \ No newline at end of file From 56337c442103e9b76c9ee7351cfa7260b850cf70 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 20 Dec 2020 20:29:00 -0700 Subject: [PATCH 133/201] better words --- docs/INSTALL.rst | 4 ++-- docs/developer-release-signatures | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docs/INSTALL.rst b/docs/INSTALL.rst index 568869407..9c67e0ffe 100644 --- a/docs/INSTALL.rst +++ b/docs/INSTALL.rst @@ -197,8 +197,8 @@ Verifying Signatures -------------------- First download the source tarball and then any signatures. There are several -developers who are expected to produce signatures for a release. *At least -two signatures should be verified*. +developers who are able to produce signatures for a release. *At least two +signatures should be found and verified*. This statement, signed by the existing Tahoe release-signing key, attests to those developers authorized to sign a Tahoe release: diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index d79d01fab..0d916cf6f 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -1,9 +1,12 @@ TODO: clear-sign this with the release key +TODO: update jean-paul's information +January 3, 2021 -Any two of the following core Tahoe contributers may sign a -release. They each independantly produce a signature which are made -available beside Tahoe releases after 1.15.0 +Any of the following core Tahoe contributers may sign a release. Each +release should be signed by at least two developers. They each +independantly produce a signature which are made available beside +Tahoe releases after 1.15.0 This statement is signed by the previous Tahoe release key. Any future such statements may be signed by it OR by any two developers (for @@ -22,4 +25,4 @@ fingerprint brian warner 0xD43B4C9C73225AAF 967E FE06 6998 7241 1A77 DF36 D43B 4C9C 7322 5AAF -http://www.lothar.com/warner-gpg.html \ No newline at end of file +http://www.lothar.com/warner-gpg.html From 848fac815b93c57002495b1e9566168db2615364 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 4 Jan 2021 14:06:20 -0700 Subject: [PATCH 134/201] spelling --- docs/developer-release-signatures | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index 0d916cf6f..9f00662f4 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -5,7 +5,7 @@ January 3, 2021 Any of the following core Tahoe contributers may sign a release. Each release should be signed by at least two developers. They each -independantly produce a signature which are made available beside +independently produce a signature which are made available beside Tahoe releases after 1.15.0 This statement is signed by the previous Tahoe release key. Any future From a858d4a7cb9a128cb26f6d692c62e9b7627d5bac Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 4 Jan 2021 14:15:33 -0700 Subject: [PATCH 135/201] update exarkun's information --- docs/developer-release-signatures | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index 9f00662f4..b2752e8ca 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -17,9 +17,9 @@ meejah 9D5A 2BD5 688E CB88 9DEB CD3F C260 2803 1280 69A7 https://meejah.ca/meejah.asc -jean-paul calderone -0x?? -fingerprint +jean-paul calderone (exarkun) +0xE27B085EDEAA4B1B +96B9 C5DA B2EA 9EB6 7941 9DB7 E27B 085E DEAA 4B1B [url for key] brian warner From 2a3d01a9cc077af2ffd965978b009544cb899ded Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 4 Jan 2021 14:22:09 -0700 Subject: [PATCH 136/201] url for exarkun's key --- docs/developer-release-signatures | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index b2752e8ca..ba2a88dc9 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -20,7 +20,7 @@ https://meejah.ca/meejah.asc jean-paul calderone (exarkun) 0xE27B085EDEAA4B1B 96B9 C5DA B2EA 9EB6 7941 9DB7 E27B 085E DEAA 4B1B -[url for key] +http://pgp.mit.edu/pks/lookup?op=get&search=0xE27B085EDEAA4B1B brian warner 0xD43B4C9C73225AAF From 52c2e292d876606dd00191177080dc552d16d554 Mon Sep 17 00:00:00 2001 From: meejah Date: Mon, 4 Jan 2021 14:31:24 -0700 Subject: [PATCH 137/201] news --- newsfragments/3580.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3580.minor diff --git a/newsfragments/3580.minor b/newsfragments/3580.minor new file mode 100644 index 000000000..e69de29bb From ed9bc93571e44310dad6f9992012f0af0ac13524 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 5 Jan 2021 09:28:42 -0700 Subject: [PATCH 138/201] redundant newsfragment --- newsfragments/2920.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 newsfragments/2920.minor diff --git a/newsfragments/2920.minor b/newsfragments/2920.minor deleted file mode 100644 index e69de29bb..000000000 From 91de725d93a8810c98679e3cb8647f615064b9c8 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 5 Jan 2021 09:29:10 -0700 Subject: [PATCH 139/201] better url for exarkun's key --- docs/developer-release-signatures | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index ba2a88dc9..5e93e4425 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -20,7 +20,7 @@ https://meejah.ca/meejah.asc jean-paul calderone (exarkun) 0xE27B085EDEAA4B1B 96B9 C5DA B2EA 9EB6 7941 9DB7 E27B 085E DEAA 4B1B -http://pgp.mit.edu/pks/lookup?op=get&search=0xE27B085EDEAA4B1B +https://twistedmatrix.com/~exarkun/E27B085EDEAA4B1B.asc brian warner 0xD43B4C9C73225AAF From a031e6a4b34c986258985410672706101f029d3d Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 5 Jan 2021 09:33:31 -0700 Subject: [PATCH 140/201] more realistic date, better info --- docs/developer-release-signatures | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index 5e93e4425..6dd7303fb 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -1,7 +1,5 @@ -TODO: clear-sign this with the release key -TODO: update jean-paul's information -January 3, 2021 +January 8, 2021 Any of the following core Tahoe contributers may sign a release. Each release should be signed by at least two developers. They each From 9957790bb8b163e8e804d399068035c52c7a390a Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 20 Dec 2020 19:09:34 -0700 Subject: [PATCH 141/201] write verification instructions, and developer statement --- docs/INSTALL.rst | 5 +++-- docs/developer-release-signatures | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/INSTALL.rst b/docs/INSTALL.rst index 9c67e0ffe..59d0eb5ea 100644 --- a/docs/INSTALL.rst +++ b/docs/INSTALL.rst @@ -197,8 +197,9 @@ Verifying Signatures -------------------- First download the source tarball and then any signatures. There are several -developers who are able to produce signatures for a release. *At least two -signatures should be found and verified*. +developers who are expected to produce signatures for a release. Thus, a +release may have more than one signature. All signatures should be valid and +you should confirm at least one signature. This statement, signed by the existing Tahoe release-signing key, attests to those developers authorized to sign a Tahoe release: diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index 6dd7303fb..2c9460738 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -23,4 +23,4 @@ https://twistedmatrix.com/~exarkun/E27B085EDEAA4B1B.asc brian warner 0xD43B4C9C73225AAF 967E FE06 6998 7241 1A77 DF36 D43B 4C9C 7322 5AAF -http://www.lothar.com/warner-gpg.html +https://www.lothar.com/warner-gpg.html From 3995c932ef7967a2131c6bc24af28cefef096ad5 Mon Sep 17 00:00:00 2001 From: meejah Date: Sun, 20 Dec 2020 20:29:00 -0700 Subject: [PATCH 142/201] better words --- docs/INSTALL.rst | 6 +++--- newsfragments/2920.minor | 0 2 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 newsfragments/2920.minor diff --git a/docs/INSTALL.rst b/docs/INSTALL.rst index 59d0eb5ea..e47d87bd6 100644 --- a/docs/INSTALL.rst +++ b/docs/INSTALL.rst @@ -197,9 +197,9 @@ Verifying Signatures -------------------- First download the source tarball and then any signatures. There are several -developers who are expected to produce signatures for a release. Thus, a -release may have more than one signature. All signatures should be valid and -you should confirm at least one signature. +developers who are able to produce signatures for a release. A release may +have multiple signatures. All should be valid and you should confirm at least +one of them (ideally, confirm all). This statement, signed by the existing Tahoe release-signing key, attests to those developers authorized to sign a Tahoe release: diff --git a/newsfragments/2920.minor b/newsfragments/2920.minor new file mode 100644 index 000000000..e69de29bb From 8c1c682fdd6747d308f0663ab9be3b7b6cdd2be2 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 5 Jan 2021 09:28:42 -0700 Subject: [PATCH 143/201] redundant newsfragment --- newsfragments/2920.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 newsfragments/2920.minor diff --git a/newsfragments/2920.minor b/newsfragments/2920.minor deleted file mode 100644 index e69de29bb..000000000 From 8aaf0ee36224b9fb35d800099c0b755c37278c99 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 19 Jan 2021 10:23:27 -0700 Subject: [PATCH 144/201] tweak statement --- docs/developer-release-signatures | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index 2c9460738..8092fb436 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -1,12 +1,12 @@ -January 8, 2021 +January 20, 2021 Any of the following core Tahoe contributers may sign a release. Each -release should be signed by at least two developers. They each -independently produce a signature which are made available beside -Tahoe releases after 1.15.0 +release MUST be signed by at least one developer but MAY have +additional signatures. Each developer independently produces a +signature which is made available beside Tahoe releases after 1.15.0 -This statement is signed by the previous Tahoe release key. Any future +This statement is signed by the existing Tahoe release key. Any future such statements may be signed by it OR by any two developers (for example, to add or remove developers from the list). @@ -20,7 +20,7 @@ jean-paul calderone (exarkun) 96B9 C5DA B2EA 9EB6 7941 9DB7 E27B 085E DEAA 4B1B https://twistedmatrix.com/~exarkun/E27B085EDEAA4B1B.asc -brian warner -0xD43B4C9C73225AAF -967E FE06 6998 7241 1A77 DF36 D43B 4C9C 7322 5AAF +brian warner (lothar) +0x863333C265497810 +5810 F125 7F8C F753 7753 895A 8633 33C2 6549 7810 https://www.lothar.com/warner-gpg.html From 407014ec5b51da4e883457bdbae59145be53e1d5 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 19 Jan 2021 11:22:18 -0700 Subject: [PATCH 145/201] actually sign statement --- docs/developer-release-signatures | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/developer-release-signatures b/docs/developer-release-signatures index 8092fb436..1b55641d9 100644 --- a/docs/developer-release-signatures +++ b/docs/developer-release-signatures @@ -1,3 +1,6 @@ +-----BEGIN PGP SIGNED MESSAGE----- +Hash: SHA512 + January 20, 2021 @@ -24,3 +27,16 @@ brian warner (lothar) 0x863333C265497810 5810 F125 7F8C F753 7753 895A 8633 33C2 6549 7810 https://www.lothar.com/warner-gpg.html + + +-----BEGIN PGP SIGNATURE----- + +iQEzBAEBCgAdFiEE405i0G0Oac/KQXn/veDTHWhmanoFAmAHIyIACgkQveDTHWhm +anqhqQf/YSbMXL+gwFhAZsjX39EVlbr/Ik7WPPkJW7v1oHybTnwFpFIc52COU1x/ +sqRfk4OyYtz9IBgOPXoWgXu9R4qdK6vYKxEsekcGT9C5l0OyDz8YWXEWgbGK5mvI +aEub9WucD8r2uOQnnW6DtznFuEpvOjtf/+2BU767+bvLsbViW88ocbuLfCqLdOgD +WZT9j3M+Y2Dc56DAJzP/4fkrUSVIofZStYp5u9HBjburgcYIp0g/cyc4xXRoi6Mp +lFTRFv3MIjmoamzSQseoIgP6fi8QRqPrffPrsyqAp+06mJnPhxxFqxtO/ZErmpSa ++BGrLBxdWa8IF9U1A4Fs5nuAzAKMEg== +=E9J+ +-----END PGP SIGNATURE----- From 781deefcde2a45492c056fa0103ca4306ce5aa46 Mon Sep 17 00:00:00 2001 From: meejah Date: Tue, 19 Jan 2021 11:32:53 -0700 Subject: [PATCH 146/201] command-line to sign a tag with official key --- docs/release-checklist.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/release-checklist.rst b/docs/release-checklist.rst index 18c908a99..fedefee51 100644 --- a/docs/release-checklist.rst +++ b/docs/release-checklist.rst @@ -118,6 +118,12 @@ Did anyone contribute a hack since the last release? If so, then https://tahoe-lafs.org/hacktahoelafs/ needs to be updated. +Sign Git Tag +```````````` + +- git tag -s -u 0xE34E62D06D0E69CFCA4179FFBDE0D31D68666A7A -m "release Tahoe-LAFS-X.Y.Z" tahoe-lafs-X.Y.Z + + Upload Artifacts ```````````````` From 755de5edafe7589ecf2202a05d17a403913b31fd Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jan 2021 09:58:51 -0500 Subject: [PATCH 147/201] Start of passing tests on Python 3. --- src/allmydata/test/web/test_web.py | 32 ++++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index 2f000b7a1..244478a55 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -275,13 +275,13 @@ class FakeClient(_Client): # type: ignore # tahoe-lafs/ticket/3573 node_config=EMPTY_CLIENT_CONFIG, ) # fake knowledge of another server - self.storage_broker.test_add_server("other_nodeid", + self.storage_broker.test_add_server(b"other_nodeid", FakeDisplayableServer( serverid=b"other_nodeid", nickname=u"other_nickname \u263B", connected = True, last_connect_time = 10, last_loss_time = 20, last_rx_time = 30)) - self.storage_broker.test_add_server("disconnected_nodeid", + self.storage_broker.test_add_server(b"disconnected_nodeid", FakeDisplayableServer( - serverid="disconnected_nodeid", nickname=u"disconnected_nickname \u263B", connected = False, + serverid=b"disconnected_nodeid", nickname=u"disconnected_nickname \u263B", connected = False, last_connect_time = None, last_loss_time = 25, last_rx_time = 35)) self.introducer_client = None self.history = FakeHistory() @@ -665,6 +665,8 @@ class MultiFormatResourceTests(TrialTestCase): Tests for ``MultiFormatResource``. """ def render(self, resource, **queryargs): + # Query arguments in real twisted.web requests have byte keys. + queryargs = {k.encode("utf-8"): v for (k, v) in queryargs.items()} return self.successResultOf(render(resource, queryargs)) def resource(self): @@ -675,13 +677,13 @@ class MultiFormatResourceTests(TrialTestCase): class Content(MultiFormatResource): def render_HTML(self, req): - return "html" + return b"html" def render_A(self, req): - return "a" + return b"a" def render_B(self, req): - return "b" + return b"b" return Content() @@ -693,7 +695,7 @@ class MultiFormatResourceTests(TrialTestCase): """ resource = self.resource() resource.formatArgument = "foo" - self.assertEqual("a", self.render(resource, foo=["a"])) + self.assertEqual(b"a", self.render(resource, foo=[b"a"])) def test_default_format_argument(self): @@ -702,7 +704,7 @@ class MultiFormatResourceTests(TrialTestCase): then the ``t`` argument is used. """ resource = self.resource() - self.assertEqual("a", self.render(resource, t=["a"])) + self.assertEqual(b"a", self.render(resource, t=[b"a"])) def test_no_format(self): @@ -711,7 +713,7 @@ class MultiFormatResourceTests(TrialTestCase): been defined, the base rendering behavior is used (``render_HTML``). """ resource = self.resource() - self.assertEqual("html", self.render(resource)) + self.assertEqual(b"html", self.render(resource)) def test_default_format(self): @@ -722,7 +724,7 @@ class MultiFormatResourceTests(TrialTestCase): """ resource = self.resource() resource.formatDefault = "b" - self.assertEqual("b", self.render(resource)) + self.assertEqual(b"b", self.render(resource)) def test_explicit_none_format_renderer(self): @@ -732,7 +734,7 @@ class MultiFormatResourceTests(TrialTestCase): """ resource = self.resource() resource.render_FOO = None - self.assertEqual("html", self.render(resource, t=["foo"])) + self.assertEqual(b"html", self.render(resource, t=[b"foo"])) def test_unknown_format(self): @@ -741,15 +743,15 @@ class MultiFormatResourceTests(TrialTestCase): returned. """ resource = self.resource() - response_body = self.render(resource, t=["foo"]) + response_body = self.render(resource, t=[b"foo"]) self.assertIn( - "400 - Bad Format", response_body, + b"400 - Bad Format", response_body, ) self.assertIn( - "Unknown t value:", response_body, + b"Unknown t value:", response_body, ) self.assertIn( - "'foo'", response_body, + b"'foo'", response_body, ) From 011b027c392ea1b3c182e419b1db91922a4f9465 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 22 Jan 2021 10:14:28 -0500 Subject: [PATCH 148/201] More passing tests on Python 3. --- src/allmydata/test/common.py | 14 ++--- src/allmydata/test/web/test_web.py | 95 +++++++++++++++--------------- 2 files changed, 56 insertions(+), 53 deletions(-) diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index fde92fb59..f7f339b98 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -566,12 +566,12 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation self.file_types[self.storage_index] = version initial_contents = self._get_initial_contents(contents) data = initial_contents.read(initial_contents.get_size()) - data = "".join(data) + data = b"".join(data) self.all_contents[self.storage_index] = data return defer.succeed(self) def _get_initial_contents(self, contents): if contents is None: - return MutableData("") + return MutableData(b"") if IMutableUploadable.providedBy(contents): return contents @@ -625,7 +625,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation def raise_error(self): pass def get_writekey(self): - return "\x00"*16 + return b"\x00"*16 def get_size(self): return len(self.all_contents[self.storage_index]) def get_current_size(self): @@ -644,7 +644,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation return self.file_types[self.storage_index] def check(self, monitor, verify=False, add_lease=False): - s = StubServer("\x00"*20) + s = StubServer(b"\x00"*20) r = CheckResults(self.my_uri, self.storage_index, healthy=True, recoverable=True, count_happiness=10, @@ -655,7 +655,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation count_recoverable_versions=1, count_unrecoverable_versions=0, servers_responding=[s], - sharemap={"seq1-abcd-sh0": [s]}, + sharemap={b"seq1-abcd-sh0": [s]}, count_wrong_shares=0, list_corrupt_shares=[], count_corrupt_shares=0, @@ -709,7 +709,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation def overwrite(self, new_contents): assert not self.is_readonly() new_data = new_contents.read(new_contents.get_size()) - new_data = "".join(new_data) + new_data = b"".join(new_data) self.all_contents[self.storage_index] = new_data return defer.succeed(None) def modify(self, modifier): @@ -740,7 +740,7 @@ class FakeMutableFileNode(object): # type: ignore # incomplete implementation def update(self, data, offset): assert not self.is_readonly() def modifier(old, servermap, first_time): - new = old[:offset] + "".join(data.read(data.get_size())) + new = old[:offset] + b"".join(data.read(data.get_size())) new += old[len(new):] return new return self.modify(modifier) diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index 244478a55..bcf4db1bf 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -1,8 +1,11 @@ from __future__ import print_function -import os.path, re, urllib, time +from past.builtins import unicode + +import os.path, re, time import json import treq +from urllib.parse import quote as urlquote, unquote as urlunquote from bs4 import BeautifulSoup @@ -115,8 +118,8 @@ class FakeUploader(service.Service): servermap={}, timings={}, uri_extension_data={}, - uri_extension_hash="fake", - verifycapstr="fakevcap") + uri_extension_hash=b"fake", + verifycapstr=b"fakevcap") ur.set_uri(n.get_uri()) return ur d.addCallback(_got_data) @@ -297,12 +300,12 @@ class FakeClient(_Client): # type: ignore # tahoe-lafs/ticket/3573 self.addService(FakeStorageServer(self.nodeid, self.nickname)) def get_long_nodeid(self): - return "v0-nodeid" + return b"v0-nodeid" def get_long_tubid(self): - return "tubid" + return u"tubid" def get_auth_token(self): - return 'a fake debug auth token' + return b'a fake debug auth token' def startService(self): return service.MultiService.startService(self) @@ -340,7 +343,7 @@ class WebMixin(TimezoneMixin): def _then(res): self.public_root = res[0][1] assert interfaces.IDirectoryNode.providedBy(self.public_root), res - self.public_url = "/uri/" + self.public_root.get_uri() + self.public_url = "/uri/" + unicode(self.public_root.get_uri(), "ascii") self.private_root = res[1][1] foo = res[2][1] @@ -365,7 +368,7 @@ class WebMixin(TimezoneMixin): # mdmf self.QUUX_CONTENTS, n, self._quux_txt_uri, self._quux_txt_readonly_uri = self.makefile_mutable(0, mdmf=True) - assert self._quux_txt_uri.startswith("URI:MDMF") + assert self._quux_txt_uri.startswith(b"URI:MDMF") foo.set_uri(u"quux.txt", self._quux_txt_uri, self._quux_txt_readonly_uri) foo.set_uri(u"empty", res[3][1].get_uri(), @@ -382,7 +385,7 @@ class WebMixin(TimezoneMixin): # filenode to test for html encoding issues self._htmlname_unicode = u"<&weirdly'named\"file>>>_