Merge branch 'master' into 3917-refactor-test-storage-py

This commit is contained in:
Fon E. Noel NFEBE 2023-03-11 20:54:12 +01:00
commit 4e6e78f19c
137 changed files with 23533 additions and 1889 deletions

78
.circleci/circleci.txt Normal file
View File

@ -0,0 +1,78 @@
# A master build looks like this:
# BASH_ENV=/tmp/.bash_env-63d018969ca480003a031e62-0-build
# CI=true
# CIRCLECI=true
# CIRCLE_BRANCH=master
# CIRCLE_BUILD_NUM=76545
# CIRCLE_BUILD_URL=https://circleci.com/gh/tahoe-lafs/tahoe-lafs/76545
# CIRCLE_JOB=NixOS 21.11
# CIRCLE_NODE_INDEX=0
# CIRCLE_NODE_TOTAL=1
# CIRCLE_PROJECT_REPONAME=tahoe-lafs
# CIRCLE_PROJECT_USERNAME=tahoe-lafs
# CIRCLE_REPOSITORY_URL=git@github.com:tahoe-lafs/tahoe-lafs.git
# CIRCLE_SHA1=ed0bda2d7456f4a2cd60870072e1fe79864a49a1
# CIRCLE_SHELL_ENV=/tmp/.bash_env-63d018969ca480003a031e62-0-build
# CIRCLE_USERNAME=alice
# CIRCLE_WORKFLOW_ID=6d9bb71c-be3a-4659-bf27-60954180619b
# CIRCLE_WORKFLOW_JOB_ID=0793c975-7b9f-489f-909b-8349b72d2785
# CIRCLE_WORKFLOW_WORKSPACE_ID=6d9bb71c-be3a-4659-bf27-60954180619b
# CIRCLE_WORKING_DIRECTORY=~/project
# A build of an in-repo PR looks like this:
# BASH_ENV=/tmp/.bash_env-63d1971a0298086d8841287e-0-build
# CI=true
# CIRCLECI=true
# CIRCLE_BRANCH=3946-less-chatty-downloads
# CIRCLE_BUILD_NUM=76612
# CIRCLE_BUILD_URL=https://circleci.com/gh/tahoe-lafs/tahoe-lafs/76612
# CIRCLE_JOB=NixOS 21.11
# CIRCLE_NODE_INDEX=0
# CIRCLE_NODE_TOTAL=1
# CIRCLE_PROJECT_REPONAME=tahoe-lafs
# CIRCLE_PROJECT_USERNAME=tahoe-lafs
# CIRCLE_PULL_REQUEST=https://github.com/tahoe-lafs/tahoe-lafs/pull/1251
# CIRCLE_PULL_REQUESTS=https://github.com/tahoe-lafs/tahoe-lafs/pull/1251
# CIRCLE_REPOSITORY_URL=git@github.com:tahoe-lafs/tahoe-lafs.git
# CIRCLE_SHA1=921a2083dcefdb5f431cdac195fc9ac510605349
# CIRCLE_SHELL_ENV=/tmp/.bash_env-63d1971a0298086d8841287e-0-build
# CIRCLE_USERNAME=bob
# CIRCLE_WORKFLOW_ID=5e32c12e-be37-4868-9fa8-6a6929fec2f1
# CIRCLE_WORKFLOW_JOB_ID=316ca408-81b4-4c96-bbdd-644e4c3e01e5
# CIRCLE_WORKFLOW_WORKSPACE_ID=5e32c12e-be37-4868-9fa8-6a6929fec2f1
# CIRCLE_WORKING_DIRECTORY=~/project
# CI_PULL_REQUEST=https://github.com/tahoe-lafs/tahoe-lafs/pull/1251
# A build of a PR from a fork looks like this:
# BASH_ENV=/tmp/.bash_env-63d40f7b2e89cd3de10e0db9-0-build
# CI=true
# CIRCLECI=true
# CIRCLE_BRANCH=pull/1252
# CIRCLE_BUILD_NUM=76678
# CIRCLE_BUILD_URL=https://circleci.com/gh/tahoe-lafs/tahoe-lafs/76678
# CIRCLE_JOB=NixOS 21.05
# CIRCLE_NODE_INDEX=0
# CIRCLE_NODE_TOTAL=1
# CIRCLE_PROJECT_REPONAME=tahoe-lafs
# CIRCLE_PROJECT_USERNAME=tahoe-lafs
# CIRCLE_PR_NUMBER=1252
# CIRCLE_PR_REPONAME=tahoe-lafs
# CIRCLE_PR_USERNAME=carol
# CIRCLE_PULL_REQUEST=https://github.com/tahoe-lafs/tahoe-lafs/pull/1252
# CIRCLE_PULL_REQUESTS=https://github.com/tahoe-lafs/tahoe-lafs/pull/1252
# CIRCLE_REPOSITORY_URL=git@github.com:tahoe-lafs/tahoe-lafs.git
# CIRCLE_SHA1=15c7916e0812e6baa2a931cd54b18f3382a8456e
# CIRCLE_SHELL_ENV=/tmp/.bash_env-63d40f7b2e89cd3de10e0db9-0-build
# CIRCLE_USERNAME=
# CIRCLE_WORKFLOW_ID=19c917c8-3a38-4b20-ac10-3265259fa03e
# CIRCLE_WORKFLOW_JOB_ID=58e95215-eccf-4664-a231-1dba7fd2d323
# CIRCLE_WORKFLOW_WORKSPACE_ID=19c917c8-3a38-4b20-ac10-3265259fa03e
# CIRCLE_WORKING_DIRECTORY=~/project
# CI_PULL_REQUEST=https://github.com/tahoe-lafs/tahoe-lafs/pull/1252
# A build of a PR from a fork where the owner has enabled CircleCI looks
# the same as a build of an in-repo PR, except it runs on th owner's
# CircleCI namespace.

View File

@ -11,20 +11,60 @@
#
version: 2.1
# A template that can be shared between the two different image-building
# workflows.
.images: &IMAGES
jobs:
# Every job that pushes a Docker image from Docker Hub needs to provide
# credentials. Use this first job to define a yaml anchor that can be
# used to supply a CircleCI job context which makes Docker Hub credentials
# available in the environment.
#
# Contexts are managed in the CircleCI web interface:
#
# https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts
- "build-image-debian-11": &DOCKERHUB_CONTEXT
<<: *DOCKERHUB_CONTEXT
- "build-image-ubuntu-20-04":
<<: *DOCKERHUB_CONTEXT
- "build-image-fedora-35":
<<: *DOCKERHUB_CONTEXT
- "build-image-oraclelinux-8":
<<: *DOCKERHUB_CONTEXT
# Restore later as PyPy38
#- "build-image-pypy27-buster":
# <<: *DOCKERHUB_CONTEXT
parameters:
# Control whether the image-building workflow runs as part of this pipeline.
# Generally we do not want this to run because we don't need our
# dependencies to move around all the time and because building the image
# takes a couple minutes.
#
# An easy way to trigger a pipeline with this set to true is with the
# rebuild-images.sh tool in this directory. You can also do so via the
# CircleCI web UI.
build-images:
default: false
type: "boolean"
# Control whether the test-running workflow runs as part of this pipeline.
# Generally we do want this to run because running the tests is the primary
# purpose of this pipeline.
run-tests:
default: true
type: "boolean"
workflows:
ci:
when: "<< pipeline.parameters.run-tests >>"
jobs:
# Start with jobs testing various platforms.
- "debian-10":
{}
- "debian-11":
{}
- "ubuntu-20-04":
{}
- "ubuntu-18-04":
requires:
- "ubuntu-20-04"
# Equivalent to RHEL 8; CentOS 8 is dead.
- "oraclelinux-8":
@ -54,6 +94,9 @@ workflows:
{}
- "integration":
# Run even the slow integration tests here. We need the `--` to
# sneak past tox and get to pytest.
tox-args: "-- --runslow integration"
requires:
# If the unit test suite doesn't pass, don't bother running the
# integration tests.
@ -65,41 +108,10 @@ workflows:
{}
images:
# Build the Docker images used by the ci jobs. This makes the ci jobs
# faster and takes various spurious failures out of the critical path.
triggers:
# Build once a day
- schedule:
cron: "0 0 * * *"
filters:
branches:
only:
- "master"
<<: *IMAGES
jobs:
# Every job that pushes a Docker image from Docker Hub needs to provide
# credentials. Use this first job to define a yaml anchor that can be
# used to supply a CircleCI job context which makes Docker Hub
# credentials available in the environment.
#
# Contexts are managed in the CircleCI web interface:
#
# https://app.circleci.com/settings/organization/github/tahoe-lafs/contexts
- "build-image-debian-10": &DOCKERHUB_CONTEXT
context: "dockerhub-auth"
- "build-image-debian-11":
<<: *DOCKERHUB_CONTEXT
- "build-image-ubuntu-18-04":
<<: *DOCKERHUB_CONTEXT
- "build-image-ubuntu-20-04":
<<: *DOCKERHUB_CONTEXT
- "build-image-fedora-35":
<<: *DOCKERHUB_CONTEXT
- "build-image-oraclelinux-8":
<<: *DOCKERHUB_CONTEXT
# Restore later as PyPy38
#- "build-image-pypy27-buster":
# <<: *DOCKERHUB_CONTEXT
# Build as part of the workflow but only if requested.
when: "<< pipeline.parameters.build-images >>"
jobs:
@ -133,10 +145,10 @@ jobs:
steps:
- "checkout"
- run:
- run: &INSTALL_TOX
name: "Install tox"
command: |
pip install --user tox
pip install --user 'tox~=3.0'
- run:
name: "Static-ish code checks"
@ -152,9 +164,7 @@ jobs:
- "checkout"
- run:
name: "Install tox"
command: |
pip install --user tox
<<: *INSTALL_TOX
- run:
name: "Make PyInstaller executable"
@ -169,12 +179,7 @@ jobs:
command: |
dist/Tahoe-LAFS/tahoe --version
debian-10: &DEBIAN
docker:
- <<: *DOCKERHUB_AUTH
image: "tahoelafsci/debian:10-py3.7"
user: "nobody"
debian-11: &DEBIAN
environment: &UTF_8_ENVIRONMENT
# In general, the test suite is not allowed to fail while the job
# succeeds. But you can set this to "yes" if you want it to be
@ -186,7 +191,7 @@ jobs:
# filenames and argv).
LANG: "en_US.UTF-8"
# Select a tox environment to run for this job.
TAHOE_LAFS_TOX_ENVIRONMENT: "py37"
TAHOE_LAFS_TOX_ENVIRONMENT: "py39"
# Additional arguments to pass to tox.
TAHOE_LAFS_TOX_ARGS: ""
# The path in which test artifacts will be placed.
@ -254,15 +259,11 @@ jobs:
/tmp/venv/bin/codecov
fi
debian-11:
<<: *DEBIAN
docker:
- <<: *DOCKERHUB_AUTH
image: "tahoelafsci/debian:11-py3.9"
user: "nobody"
environment:
<<: *UTF_8_ENVIRONMENT
TAHOE_LAFS_TOX_ENVIRONMENT: "py39"
# Restore later using PyPy3.8
# pypy27-buster:
@ -296,6 +297,14 @@ jobs:
integration:
<<: *DEBIAN
parameters:
tox-args:
description: >-
Additional arguments to pass to the tox command.
type: "string"
default: ""
docker:
- <<: *DOCKERHUB_AUTH
image: "tahoelafsci/debian:11-py3.9"
@ -308,28 +317,15 @@ jobs:
# Disable artifact collection because py.test can't produce any.
ARTIFACTS_OUTPUT_PATH: ""
# Pass on anything we got in our parameters.
TAHOE_LAFS_TOX_ARGS: "<< parameters.tox-args >>"
steps:
- "checkout"
# DRY, YAML-style. See the debian-9 steps.
- run: *SETUP_VIRTUALENV
- run: *RUN_TESTS
ubuntu-18-04: &UBUNTU_18_04
<<: *DEBIAN
docker:
- <<: *DOCKERHUB_AUTH
image: "tahoelafsci/ubuntu:18.04-py3.7"
user: "nobody"
environment:
<<: *UTF_8_ENVIRONMENT
# The default trial args include --rterrors which is incompatible with
# this reporter on Python 3. So drop that and just specify the
# reporter.
TAHOE_LAFS_TRIAL_ARGS: "--reporter=subunitv2-file"
TAHOE_LAFS_TOX_ENVIRONMENT: "py37"
ubuntu-20-04:
<<: *DEBIAN
docker:
@ -382,7 +378,7 @@ jobs:
docker:
# Run in a highly Nix-capable environment.
- <<: *DOCKERHUB_AUTH
image: "nixos/nix:2.3.16"
image: "nixos/nix:2.10.3"
environment:
# CACHIX_AUTH_TOKEN is manually set in the CircleCI web UI and
@ -392,27 +388,21 @@ jobs:
steps:
- "run":
# The nixos/nix image does not include ssh. Install it so the
# `checkout` step will succeed. We also want cachix for
# Nix-friendly caching.
# Get cachix for Nix-friendly caching.
name: "Install Basic Dependencies"
command: |
NIXPKGS="https://github.com/nixos/nixpkgs/archive/nixos-<<parameters.nixpkgs>>.tar.gz"
nix-env \
--file https://github.com/nixos/nixpkgs/archive/nixos-<<parameters.nixpkgs>>.tar.gz \
--file $NIXPKGS \
--install \
-A openssh cachix bash
-A cachix bash
# Activate it for "binary substitution". This sets up
# configuration tht lets Nix download something from the cache
# instead of building it locally, if possible.
cachix use "${CACHIX_NAME}"
- "checkout"
- run:
name: "Cachix setup"
# Record the store paths that exist before we did much. There's no
# reason to cache these, they're either in the image or have to be
# retrieved before we can use cachix to restore from cache.
command: |
cachix use "${CACHIX_NAME}"
nix path-info --all > /tmp/store-path-pre-build
- "run":
# The Nix package doesn't know how to do this part, unfortunately.
name: "Generate version"
@ -434,55 +424,26 @@ jobs:
# build a couple simple little dependencies that don't take
# advantage of multiple cores and we get a little speedup by doing
# them in parallel.
nix-build --cores 3 --max-jobs 2 --argstr pkgsVersion "nixpkgs-<<parameters.nixpkgs>>"
source .circleci/lib.sh
cache_if_able nix-build \
--cores 3 \
--max-jobs 2 \
--argstr pkgsVersion "nixpkgs-<<parameters.nixpkgs>>"
- "run":
name: "Test"
command: |
# Let it go somewhat wild for the test suite itself
nix-build --cores 8 --argstr pkgsVersion "nixpkgs-<<parameters.nixpkgs>>" tests.nix
- run:
# Send any new store objects to cachix.
name: "Push to Cachix"
when: "always"
command: |
# Cribbed from
# https://circleci.com/blog/managing-secrets-when-you-have-pull-requests-from-outside-contributors/
if [ -n "$CIRCLE_PR_NUMBER" ]; then
# I'm sure you're thinking "CIRCLE_PR_NUMBER must just be the
# number of the PR being built". Sorry, dear reader, you have
# guessed poorly. It is also conditionally set based on whether
# this is a PR from a fork or not.
#
# https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables
echo "Skipping Cachix push for forked PR."
else
# If this *isn't* a build from a fork then we have the Cachix
# write key in our environment and we can push any new objects
# to Cachix.
#
# To decide what to push, we inspect the list of store objects
# that existed before and after we did most of our work. Any
# that are new after the work is probably a useful thing to have
# around so push it to the cache. We exclude all derivation
# objects (.drv files) because they're cheap to reconstruct and
# by the time you know their cache key you've already done all
# the work anyway.
#
# This shell expression for finding the objects and pushing them
# was from the Cachix docs:
#
# https://docs.cachix.org/continuous-integration-setup/circleci.html
#
# but they seem to have removed it now.
bash -c "comm -13 <(sort /tmp/store-path-pre-build | grep -v '\.drv$') <(nix path-info --all | grep -v '\.drv$' | sort) | cachix push $CACHIX_NAME"
fi
source .circleci/lib.sh
cache_if_able nix-build \
--cores 8 \
--argstr pkgsVersion "nixpkgs-<<parameters.nixpkgs>>" \
tests.nix
typechecks:
docker:
- <<: *DOCKERHUB_AUTH
image: "tahoelafsci/ubuntu:18.04-py3.7"
image: "tahoelafsci/ubuntu:20.04-py3.9"
steps:
- "checkout"
@ -494,7 +455,7 @@ jobs:
docs:
docker:
- <<: *DOCKERHUB_AUTH
image: "tahoelafsci/ubuntu:18.04-py3.7"
image: "tahoelafsci/ubuntu:20.04-py3.9"
steps:
- "checkout"
@ -545,15 +506,6 @@ jobs:
docker push tahoelafsci/${DISTRO}:${TAG}-py${PYTHON_VERSION}
build-image-debian-10:
<<: *BUILD_IMAGE
environment:
DISTRO: "debian"
TAG: "10"
PYTHON_VERSION: "3.7"
build-image-debian-11:
<<: *BUILD_IMAGE
@ -562,14 +514,6 @@ jobs:
TAG: "11"
PYTHON_VERSION: "3.9"
build-image-ubuntu-18-04:
<<: *BUILD_IMAGE
environment:
DISTRO: "ubuntu"
TAG: "18.04"
PYTHON_VERSION: "3.7"
build-image-ubuntu-20-04:
<<: *BUILD_IMAGE

119
.circleci/lib.sh Normal file
View File

@ -0,0 +1,119 @@
# Run a command, enabling cache writes to cachix if possible. The command is
# accepted as a variable number of positional arguments (like argv).
function cache_if_able() {
# Dump some info about our build environment.
describe_build
if is_cache_writeable; then
# If the cache is available we'll use it. This lets fork owners set
# up their own caching if they want.
echo "Cachix credentials present; will attempt to write to cache."
# The `cachix watch-exec ...` does our cache population. When it sees
# something added to the store (I guess) it pushes it to the named
# cache.
cachix watch-exec "${CACHIX_NAME}" -- "$@"
else
if is_cache_required; then
echo "Required credentials (CACHIX_AUTH_TOKEN) are missing."
return 1
else
echo "Cachix credentials missing; will not attempt cache writes."
"$@"
fi
fi
}
function is_cache_writeable() {
# We can only *push* to the cache if we have a CACHIX_AUTH_TOKEN. in-repo
# jobs will get this from CircleCI configuration but jobs from forks may
# not.
[ -v CACHIX_AUTH_TOKEN ]
}
function is_cache_required() {
# If we're building in tahoe-lafs/tahoe-lafs then we must use the cache.
# If we're building anything from a fork then we're allowed to not have
# the credentials.
is_upstream
}
# Return success if the origin of this build is the tahoe-lafs/tahoe-lafs
# repository itself (and so we expect to have cache credentials available),
# failure otherwise.
#
# See circleci.txt for notes about how this determination is made.
function is_upstream() {
# CIRCLE_PROJECT_USERNAME is set to the org the build is happening for.
# If a PR targets a fork of the repo then this is set to something other
# than "tahoe-lafs".
[ "$CIRCLE_PROJECT_USERNAME" == "tahoe-lafs" ] &&
# CIRCLE_BRANCH is set to the real branch name for in-repo PRs and
# "pull/NNNN" for pull requests from forks.
#
# CIRCLE_PULL_REQUESTS is set to a comma-separated list of the full
# URLs of the PR pages which share an underlying branch, with one of
# them ended with that same "pull/NNNN" for PRs from forks.
! any_element_endswith "/$CIRCLE_BRANCH" "," "$CIRCLE_PULL_REQUESTS"
}
# Return success if splitting $3 on $2 results in an array with any element
# that ends with $1, failure otherwise.
function any_element_endswith() {
suffix=$1
shift
sep=$1
shift
haystack=$1
shift
IFS="${sep}" read -r -a elements <<< "$haystack"
for elem in "${elements[@]}"; do
if endswith "$suffix" "$elem"; then
return 0
fi
done
return 1
}
# Return success if $2 ends with $1, failure otherwise.
function endswith() {
suffix=$1
shift
haystack=$1
shift
case "$haystack" in
*${suffix})
return 0
;;
*)
return 1
;;
esac
}
function describe_build() {
echo "Building PR for user/org: ${CIRCLE_PROJECT_USERNAME}"
echo "Building branch: ${CIRCLE_BRANCH}"
if is_upstream; then
echo "Upstream build."
else
echo "Non-upstream build."
fi
if is_cache_required; then
echo "Cache is required."
else
echo "Cache not required."
fi
if is_cache_writeable; then
echo "Cache is writeable."
else
echo "Cache not writeable."
fi
}

View File

@ -9,7 +9,7 @@ BASIC_DEPS="pip wheel"
# Python packages we need to support the test infrastructure. *Not* packages
# Tahoe-LAFS itself (implementation or test suite) need.
TEST_DEPS="tox codecov"
TEST_DEPS="tox~=3.0 codecov"
# Python packages we need to generate test reports for CI infrastructure.
# *Not* packages Tahoe-LAFS itself (implement or test suite) need.

20
.circleci/rebuild-images.sh Executable file
View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
set -euo pipefail
# Get your API token here:
# https://app.circleci.com/settings/user/tokens
API_TOKEN=$1
shift
# Name the branch you want to trigger the build for
BRANCH=$1
shift
curl \
--verbose \
--request POST \
--url https://circleci.com/api/v2/project/gh/tahoe-lafs/tahoe-lafs/pipeline \
--header "Circle-Token: $API_TOKEN" \
--header "content-type: application/json" \
--data '{"branch":"'"$BRANCH"'","parameters":{"build-images":true,"run-tests":false}}'

View File

@ -45,14 +45,15 @@ fi
# A prefix for the test command that ensure it will exit after no more than a
# certain amount of time. Ideally, we would only enforce a "silent" period
# timeout but there isn't obviously a ready-made tool for that. The test
# suite only takes about 5 - 6 minutes on CircleCI right now. 15 minutes
# seems like a moderately safe window.
# timeout but there isn't obviously a ready-made tool for that. The unit test
# suite only takes about 5 - 6 minutes on CircleCI right now. The integration
# tests are a bit longer than that. 45 minutes seems like a moderately safe
# window.
#
# This is primarily aimed at catching hangs on the PyPy job which runs for
# about 21 minutes and then gets killed by CircleCI in a way that fails the
# job and bypasses our "allowed failure" logic.
TIMEOUT="timeout --kill-after 1m 25m"
TIMEOUT="timeout --kill-after 1m 45m"
# Run the test suite as a non-root user. This is the expected usage some
# small areas of the test suite assume non-root privileges (such as unreadable

View File

@ -6,6 +6,16 @@ on:
- "master"
pull_request:
# At the start of each workflow run, GitHub creates a unique
# GITHUB_TOKEN secret to use in the workflow. It is a good idea for
# this GITHUB_TOKEN to have the minimum of permissions. See:
#
# - https://docs.github.com/en/actions/security-guides/automatic-token-authentication
# - https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#permissions
#
permissions:
contents: read
# Control to what degree jobs in this workflow will run concurrently with
# other instances of themselves.
#
@ -38,73 +48,67 @@ jobs:
- windows-latest
- ubuntu-latest
python-version:
- "3.7"
- "3.8"
- "3.9"
- "3.10"
- "3.11"
include:
# On macOS don't bother with 3.7-3.8, just to get faster builds.
# On macOS don't bother with 3.8, just to get faster builds.
- os: macos-latest
python-version: "3.9"
- os: macos-latest
python-version: "3.10"
python-version: "3.11"
# We only support PyPy on Linux at the moment.
- os: ubuntu-latest
python-version: "pypy-3.7"
- os: ubuntu-latest
python-version: "pypy-3.8"
- os: ubuntu-latest
python-version: "pypy-3.9"
steps:
# See https://github.com/actions/checkout. A fetch-depth of 0
# fetches all tags and branches.
- name: Check out Tahoe-LAFS sources
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
# To use pip caching with GitHub Actions in an OS-independent
# manner, we need `pip cache dir` command, which became
# available since pip v20.1+. At the time of writing this,
# GitHub Actions offers pip v20.3.3 for both ubuntu-latest and
# windows-latest, and pip v20.3.1 for macos-latest.
- name: Get pip cache directory
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
# See https://github.com/actions/cache
- name: Use pip cache
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
cache: 'pip' # caching pip dependencies
- name: Install Python packages
run: |
pip install --upgrade codecov tox tox-gh-actions setuptools
pip install --upgrade codecov "tox<4" tox-gh-actions setuptools
pip list
- name: Display tool versions
run: python misc/build_helpers/show-tool-versions.py
- name: Run tox for corresponding Python version
if: ${{ !contains(matrix.os, 'windows') }}
run: python -m tox
# On Windows, a non-blocking pipe might respond (when emulating Unix-y
# API) with ENOSPC to indicate buffer full. Trial doesn't handle this
# well, so it breaks test runs. To attempt to solve this, we pipe the
# output through passthrough.py that will hopefully be able to do the right
# thing by using Windows APIs.
- name: Run tox for corresponding Python version
if: ${{ contains(matrix.os, 'windows') }}
run: |
pip install twisted pywin32
python -m tox | python misc/windows-enospc/passthrough.py
- name: Upload eliot.log
uses: actions/upload-artifact@v1
uses: actions/upload-artifact@v3
with:
name: eliot.log
path: eliot.log
- name: Upload trial log
uses: actions/upload-artifact@v1
uses: actions/upload-artifact@v3
with:
name: test.log
path: _trial_temp/test.log
@ -161,21 +165,22 @@ jobs:
strategy:
fail-fast: false
matrix:
os:
- windows-latest
- ubuntu-latest
python-version:
- 3.7
- 3.9
include:
# On macOS don't bother with 3.7, just to get faster builds.
- os: macos-latest
python-version: 3.9
python-version: "3.9"
force-foolscap: false
- os: windows-latest
python-version: "3.9"
force-foolscap: false
# 22.04 has some issue with Tor at the moment:
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3943
- os: ubuntu-20.04
python-version: "3.11"
force-foolscap: false
steps:
- name: Install Tor [Ubuntu]
if: matrix.os == 'ubuntu-latest'
if: ${{ contains(matrix.os, 'ubuntu') }}
run: sudo apt install tor
# TODO: See https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3744.
@ -188,51 +193,51 @@ jobs:
- name: Install Tor [Windows]
if: matrix.os == 'windows-latest'
uses: crazy-max/ghaction-chocolatey@v1
uses: crazy-max/ghaction-chocolatey@v2
with:
args: install tor
- name: Check out Tahoe-LAFS sources
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Get pip cache directory
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: Use pip cache
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
cache: 'pip' # caching pip dependencies
- name: Install Python packages
run: |
pip install --upgrade tox
pip install --upgrade "tox<4"
pip list
- name: Display tool versions
run: python misc/build_helpers/show-tool-versions.py
- name: Run "Python 3 integration tests"
if: "${{ !matrix.force-foolscap }}"
env:
# On macOS this is necessary to ensure unix socket paths for tor
# aren't too long. On Windows tox won't pass it through so it has no
# effect. On Linux it doesn't make a difference one way or another.
TMPDIR: "/tmp"
run: tox -e integration
run: |
tox -e integration
- name: Run "Python 3 integration tests (force Foolscap)"
if: "${{ matrix.force-foolscap }}"
env:
# On macOS this is necessary to ensure unix socket paths for tor
# aren't too long. On Windows tox won't pass it through so it has no
# effect. On Linux it doesn't make a difference one way or another.
TMPDIR: "/tmp"
run: |
tox -e integration -- --force-foolscap integration/
- name: Upload eliot.log in case of failure
uses: actions/upload-artifact@v1
uses: actions/upload-artifact@v3
if: failure()
with:
name: integration.eliot.json
@ -253,31 +258,19 @@ jobs:
steps:
- name: Check out Tahoe-LAFS sources
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Get pip cache directory
id: pip-cache
run: |
echo "::set-output name=dir::$(pip cache dir)"
- name: Use pip cache
uses: actions/cache@v2
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py') }}
restore-keys: |
${{ runner.os }}-pip-
cache: 'pip' # caching pip dependencies
- name: Install Python packages
run: |
pip install --upgrade tox
pip install --upgrade "tox<4"
pip list
- name: Display tool versions
@ -291,7 +284,7 @@ jobs:
run: dist/Tahoe-LAFS/tahoe --version
- name: Upload PyInstaller package
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: Tahoe-LAFS-${{ matrix.os }}-Python-${{ matrix.python-version }}
path: dist/Tahoe-LAFS-*-*.*

View File

@ -1,10 +0,0 @@
FROM python:2.7
ADD . /tahoe-lafs
RUN \
cd /tahoe-lafs && \
git pull --depth=100 && \
pip install . && \
rm -rf ~/.cache/
WORKDIR /root

View File

@ -1,25 +0,0 @@
FROM debian:9
LABEL maintainer "gordon@leastauthority.com"
RUN apt-get update
RUN DEBIAN_FRONTEND=noninteractive apt-get -yq upgrade
RUN DEBIAN_FRONTEND=noninteractive apt-get -yq install build-essential python-dev libffi-dev libssl-dev python-virtualenv git
RUN \
git clone https://github.com/tahoe-lafs/tahoe-lafs.git /root/tahoe-lafs; \
cd /root/tahoe-lafs; \
virtualenv --python=python2.7 venv; \
./venv/bin/pip install --upgrade setuptools; \
./venv/bin/pip install --editable .; \
./venv/bin/tahoe --version;
RUN \
cd /root; \
mkdir /root/.tahoe-client; \
mkdir /root/.tahoe-introducer; \
mkdir /root/.tahoe-server;
RUN /root/tahoe-lafs/venv/bin/tahoe create-introducer --location=tcp:introducer:3458 --port=tcp:3458 /root/.tahoe-introducer
RUN /root/tahoe-lafs/venv/bin/tahoe start /root/.tahoe-introducer
RUN /root/tahoe-lafs/venv/bin/tahoe create-node --location=tcp:server:3457 --port=tcp:3457 --introducer=$(cat /root/.tahoe-introducer/private/introducer.furl) /root/.tahoe-server
RUN /root/tahoe-lafs/venv/bin/tahoe create-client --webport=3456 --introducer=$(cat /root/.tahoe-introducer/private/introducer.furl) --basedir=/root/.tahoe-client --shares-needed=1 --shares-happy=1 --shares-total=1
VOLUME ["/root/.tahoe-client", "/root/.tahoe-server", "/root/.tahoe-introducer"]
EXPOSE 3456 3457 3458
ENTRYPOINT ["/root/tahoe-lafs/venv/bin/tahoe"]
CMD []

View File

@ -56,7 +56,7 @@ Once ``tahoe --version`` works, see `How to Run Tahoe-LAFS <docs/running.rst>`__
🐍 Python 2
-----------
Python 3.7 or later is now required.
Python 3.8 or later is required.
If you are still using Python 2.7, use Tahoe-LAFS version 1.17.1.

View File

@ -0,0 +1,138 @@
"""
First attempt at benchmarking uploads and downloads.
To run:
$ pytest benchmarks/upload_download.py -s -v -Wignore
To add latency of e.g. 60ms on Linux:
$ tc qdisc add dev lo root netem delay 30ms
To reset:
$ tc qdisc del dev lo root netem
Frequency scaling can spoil the results.
To see the range of frequency scaling on a Linux system:
$ cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_available_frequencies
And to pin the CPU frequency to the lower bound found in these files:
$ sudo cpupower frequency-set -f <lowest available frequency>
TODO Parameterization (pytest?)
- Foolscap vs not foolscap
- Number of nodes
- Data size
- Number of needed/happy/total shares.
CAVEATS: The goal here isn't a realistic benchmark, or a benchmark that will be
measured over time, or is expected to be maintainable over time. This is just
a quick and easy way to measure the speed of certain operations, compare HTTP
and Foolscap, and see the short-term impact of changes.
Eventually this will be replaced by a real benchmark suite that can be run over
time to measure something more meaningful.
"""
from time import time, process_time
from contextlib import contextmanager
from tempfile import mkdtemp
import os
from twisted.trial.unittest import TestCase
from twisted.internet.defer import gatherResults
from allmydata.util.deferredutil import async_to_deferred
from allmydata.util.consumer import MemoryConsumer
from allmydata.test.common_system import SystemTestMixin
from allmydata.immutable.upload import Data as UData
from allmydata.mutable.publish import MutableData
@contextmanager
def timeit(name):
start = time()
start_cpu = process_time()
try:
yield
finally:
print(
f"{name}: {time() - start:.3f} elapsed, {process_time() - start_cpu:.3f} CPU"
)
class ImmutableBenchmarks(SystemTestMixin, TestCase):
"""Benchmarks for immutables."""
# To use Foolscap, change to True:
FORCE_FOOLSCAP_FOR_STORAGE = False
# Don't reduce HTTP connection timeouts, that messes up the more aggressive
# benchmarks:
REDUCE_HTTP_CLIENT_TIMEOUT = False
@async_to_deferred
async def setUp(self):
SystemTestMixin.setUp(self)
self.basedir = os.path.join(mkdtemp(), "nodes")
# 2 nodes
await self.set_up_nodes(2)
# 1 share
for c in self.clients:
c.encoding_params["k"] = 1
c.encoding_params["happy"] = 1
c.encoding_params["n"] = 1
print()
@async_to_deferred
async def test_upload_and_download_immutable(self):
# To test larger files, change this:
DATA = b"Some data to upload\n" * 10
for i in range(5):
# 1. Upload:
with timeit(" upload"):
uploader = self.clients[0].getServiceNamed("uploader")
results = await uploader.upload(UData(DATA, convergence=None))
# 2. Download:
with timeit("download"):
uri = results.get_uri()
node = self.clients[1].create_node_from_uri(uri)
mc = await node.read(MemoryConsumer(), 0, None)
self.assertEqual(b"".join(mc.chunks), DATA)
@async_to_deferred
async def test_upload_and_download_mutable(self):
# To test larger files, change this:
DATA = b"Some data to upload\n" * 10
for i in range(5):
# 1. Upload:
with timeit(" upload"):
result = await self.clients[0].create_mutable_file(MutableData(DATA))
# 2. Download:
with timeit("download"):
data = await result.download_best_version()
self.assertEqual(data, DATA)
@async_to_deferred
async def test_upload_mutable_in_parallel(self):
# To test larger files, change this:
DATA = b"Some data to upload\n" * 1_000_000
with timeit(" upload"):
await gatherResults([
self.clients[0].create_mutable_file(MutableData(DATA))
for _ in range(20)
])

View File

@ -29,7 +29,7 @@ in
, pypiData ? sources.pypi-deps-db # the pypi package database snapshot to use
# for dependency resolution
, pythonVersion ? "python37" # a string choosing the python derivation from
, pythonVersion ? "python39" # a string choosing the python derivation from
# nixpkgs to target
, extras ? [ "tor" "i2p" ] # a list of strings identifying tahoe-lafs extras,

View File

@ -1,49 +0,0 @@
version: '2'
services:
client:
build:
context: .
dockerfile: ./Dockerfile.dev
volumes:
- ./misc:/root/tahoe-lafs/misc
- ./integration:/root/tahoe-lafs/integration
- ./src:/root/tahoe-lafs/static
- ./setup.cfg:/root/tahoe-lafs/setup.cfg
- ./setup.py:/root/tahoe-lafs/setup.py
ports:
- "127.0.0.1:3456:3456"
depends_on:
- "introducer"
- "server"
entrypoint: /root/tahoe-lafs/venv/bin/tahoe
command: ["run", "/root/.tahoe-client"]
server:
build:
context: .
dockerfile: ./Dockerfile.dev
volumes:
- ./misc:/root/tahoe-lafs/misc
- ./integration:/root/tahoe-lafs/integration
- ./src:/root/tahoe-lafs/static
- ./setup.cfg:/root/tahoe-lafs/setup.cfg
- ./setup.py:/root/tahoe-lafs/setup.py
ports:
- "127.0.0.1:3457:3457"
depends_on:
- "introducer"
entrypoint: /root/tahoe-lafs/venv/bin/tahoe
command: ["run", "/root/.tahoe-server"]
introducer:
build:
context: .
dockerfile: ./Dockerfile.dev
volumes:
- ./misc:/root/tahoe-lafs/misc
- ./integration:/root/tahoe-lafs/integration
- ./src:/root/tahoe-lafs/static
- ./setup.cfg:/root/tahoe-lafs/setup.cfg
- ./setup.py:/root/tahoe-lafs/setup.py
ports:
- "127.0.0.1:3458:3458"
entrypoint: /root/tahoe-lafs/venv/bin/tahoe
command: ["run", "/root/.tahoe-introducer"]

View File

@ -980,6 +980,9 @@ the node will not use an Introducer at all.
Such "introducerless" clients must be configured with static servers (described
below), or they will not be able to upload and download files.
.. _server_list:
Static Server Definitions
=========================

View File

@ -32,6 +32,7 @@ Contents:
gpg-setup
servers
managed-grid
helper
convergence-secret
garbage-collection

342
docs/managed-grid.rst Normal file
View File

@ -0,0 +1,342 @@
Managed Grid
============
This document explains the "Grid Manager" concept and the
`grid-manager` command. Someone operating a grid may choose to use a
Grid Manager. Operators of storage-servers and clients will then be
given additional configuration in this case.
Overview and Motivation
-----------------------
In a grid using an Introducer, a client will use any storage-server
the Introducer announces (and the Introducer will announce any
storage-server that connects to it). This means that anyone with the
Introducer fURL can connect storage to the grid.
Sometimes, this is just what you want!
For some use-cases, though, you want to have clients only use certain
servers. One case might be a "managed" grid, where some entity runs
the grid; clients of this grid don't want their uploads to go to
"unmanaged" storage if some other client decides to provide storage.
One way to limit which storage servers a client connects to is via the
"server list" (:ref:`server_list`) (aka "Introducerless"
mode). Clients are given static lists of storage-servers, and connect
only to those. This means manually updating these lists if the storage
servers change, however.
Another method is for clients to use `[client] peers.preferred=`
configuration option (:ref:`Client Configuration`), which suffers
from a similar disadvantage.
Grid Manager
------------
A "grid-manager" consists of some data defining a keypair (along with
some other details) and Tahoe sub-commands to manipulate the data and
produce certificates to give to storage-servers. Certificates assert
the statement: "Grid Manager X suggests you use storage-server Y to
upload shares to" (X and Y are public-keys). Such a certificate
consists of:
- the version of the format the certificate conforms to (`1`)
- the public-key of a storage-server
- an expiry timestamp
- a signature of the above
A client will always use any storage-server for downloads (expired
certificate, or no certificate) because clients check the ciphertext
and re-assembled plaintext against the keys in the capability;
"grid-manager" certificates only control uploads.
Clients make use of this functionality by configuring one or more Grid Manager public keys.
This tells the client to only upload to storage-servers that have a currently-valid certificate from any of the Grid Managers their client allows.
In case none are configured, the default behavior (of using any storage server) prevails.
Grid Manager Data Storage
-------------------------
The data defining the grid-manager is stored in an arbitrary
directory, which you indicate with the ``--config`` option (in the
future, we may add the ability to store the data directly in a grid,
at which time you may be able to pass a directory-capability to this
option).
If you don't want to store the configuration on disk at all, you may
use ``--config -`` (the last character is a dash) and write a valid
JSON configuration to stdin.
All commands require the ``--config`` option and they all behave
similarly for "data from stdin" versus "data from disk". A directory
(and not a file) is used on disk because in that mode, each
certificate issued is also stored alongside the configuration
document; in "stdin / stdout" mode, an issued certificate is only
ever available on stdout.
The configuration is a JSON document. It is subject to change as Grid
Manager evolves. It contains a version number in the
`grid_manager_config_version` key which will increment whenever the
document schema changes.
grid-manager create
```````````````````
Create a new grid-manager.
If you specify ``--config -`` then a new grid-manager configuration is
written to stdout. Otherwise, a new grid-manager is created in the
directory specified by the ``--config`` option. It is an error if the
directory already exists.
grid-manager public-identity
````````````````````````````
Print out a grid-manager's public key. This key is derived from the
private-key of the grid-manager, so a valid grid-manager config must
be given via ``--config``
This public key is what is put in clients' configuration to actually
validate and use grid-manager certificates.
grid-manager add
````````````````
Takes two args: ``name pubkey``. The ``name`` is an arbitrary local
identifier for the new storage node (also sometimes called "a petname"
or "nickname"). The pubkey is the tahoe-encoded key from a ``node.pubkey``
file in the storage-server's node directory (minus any
whitespace). For example, if ``~/storage0`` contains a storage-node,
you might do something like this::
grid-manager --config ./gm0 add storage0 $(cat ~/storage0/node.pubkey)
This adds a new storage-server to a Grid Manager's
configuration. (Since it mutates the configuration, if you used
``--config -`` the new configuration will be printed to stdout). The
usefulness of the ``name`` is solely for reference within this Grid
Manager.
grid-manager list
`````````````````
Lists all storage-servers that have previously been added using
``grid-manager add``.
grid-manager sign
`````````````````
Takes two args: ``name expiry_days``. The ``name`` is a nickname used
previously in a ``grid-manager add`` command and ``expiry_days`` is
the number of days in the future when the certificate should expire.
Note that this mutates the state of the grid-manager if it is on disk,
by adding this certificate to our collection of issued
certificates. If you used ``--config -``, the certificate isn't
persisted anywhere except to stdout (so if you wish to keep it
somewhere, that is up to you).
This command creates a new "version 1" certificate for a
storage-server (identified by its public key). The new certificate is
printed to stdout. If you stored the config on disk, the new
certificate will (also) be in a file named like ``alice.cert.0``.
Enrolling a Storage Server: CLI
-------------------------------
tahoe admin add-grid-manager-cert
`````````````````````````````````
- `--filename`: the file to read the cert from
- `--name`: the name of this certificate
Import a "version 1" storage-certificate produced by a grid-manager A
storage server may have zero or more such certificates installed; for
now just one is sufficient. You will have to re-start your node after
this. Subsequent announcements to the Introducer will include this
certificate.
.. note::
This command will simply edit the `tahoe.cfg` file and direct you
to re-start. In the Future(tm), we should consider (in exarkun's
words):
"A python program you run as a new process" might not be the
best abstraction to layer on top of the configuration
persistence system, though. It's a nice abstraction for users
(although most users would probably rather have a GUI) but it's
not a great abstraction for automation. So at some point it
may be better if there is CLI -> public API -> configuration
persistence system. And maybe "public API" is even a network
API for the storage server so it's equally easy to access from
an agent implemented in essentially any language and maybe if
the API is exposed by the storage node itself then this also
gives you live-configuration-updates, avoiding the need for
node restarts (not that this is the only way to accomplish
this, but I think it's a good way because it avoids the need
for messes like inotify and it supports the notion that the
storage node process is in charge of its own configuration
persistence system, not just one consumer among many ... which
has some nice things going for it ... though how this interacts
exactly with further node management automation might bear
closer scrutiny).
Enrolling a Storage Server: Config
----------------------------------
You may edit the ``[storage]`` section of the ``tahoe.cfg`` file to
turn on grid-management with ``grid_management = true``. You then must
also provide a ``[grid_management_certificates]`` section in the
config-file which lists ``name = path/to/certificate`` pairs.
These certificate files are issued by the ``grid-manager sign``
command; these should be transmitted to the storage server operator
who includes them in the config for the storage server. Relative paths
are based from the node directory. Example::
[storage]
grid_management = true
[grid_management_certificates]
default = example_grid.cert
This will cause us to give this certificate to any Introducers we
connect to (and subsequently, the Introducer will give the certificate
out to clients).
Enrolling a Client: Config
--------------------------
You may instruct a Tahoe client to use only storage servers from given
Grid Managers. If there are no such keys, any servers are used
(but see https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3979). If
there are one or more keys, the client will only upload to a storage
server that has a valid certificate (from any of the keys).
To specify public-keys, add a ``[grid_managers]`` section to the
config. This consists of ``name = value`` pairs where ``name`` is an
arbitrary name and ``value`` is a public-key of a Grid
Manager. Example::
[grid_managers]
example_grid = pub-v0-vqimc4s5eflwajttsofisp5st566dbq36xnpp4siz57ufdavpvlq
See also https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3507 which
proposes a command to edit the config.
Example Setup of a New Managed Grid
-----------------------------------
This example creates an actual grid, but it's all just on one machine
with different "node directories" and a separate tahoe process for
each node. Usually of course each storage server would be on a
separate computer.
Note that we use the ``daemonize`` command in the following but that's
only one way to handle "running a command in the background". You
could instead run commands that start with ``daemonize ...`` in their
own shell/terminal window or via something like ``systemd``
We'll store our Grid Manager configuration on disk, in
``./gm0``. To initialize this directory::
grid-manager --config ./gm0 create
(If you already have a grid, you can :ref:`skip ahead <skip_ahead>`.)
First of all, create an Introducer. Note that we actually have to run
it briefly before it creates the "Introducer fURL" we want for the
next steps::
tahoe create-introducer --listen=tcp --port=5555 --location=tcp:localhost:5555 ./introducer
daemonize tahoe -d introducer run
Next, we attach a couple of storage nodes::
tahoe create-node --introducer $(cat introducer/private/introducer.furl) --nickname storage0 --webport 6001 --location tcp:localhost:6003 --port 6003 ./storage0
tahoe create-node --introducer $(cat introducer/private/introducer.furl) --nickname storage1 --webport 6101 --location tcp:localhost:6103 --port 6103 ./storage1
daemonize tahoe -d storage0 run
daemonize tahoe -d storage1 run
.. _skip_ahead:
We can now tell the Grid Manager about our new storage servers::
grid-manager --config ./gm0 add storage0 $(cat storage0/node.pubkey)
grid-manager --config ./gm0 add storage1 $(cat storage1/node.pubkey)
To produce a new certificate for each node, we do this::
grid-manager --config ./gm0 sign storage0 > ./storage0/gridmanager.cert
grid-manager --config ./gm0 sign storage1 > ./storage1/gridmanager.cert
Now, we want our storage servers to actually announce these
certificates into the grid. We do this by adding some configuration
(in ``tahoe.cfg``)::
[storage]
grid_management = true
[grid_manager_certificates]
default = gridmanager.cert
Add the above bit to each node's ``tahoe.cfg`` and re-start the
storage nodes. (Alternatively, use the ``tahoe add-grid-manager``
command).
Now try adding a new storage server ``storage2``. This client can join
the grid just fine, and announce itself to the Introducer as providing
storage::
tahoe create-node --introducer $(cat introducer/private/introducer.furl) --nickname storage2 --webport 6301 --location tcp:localhost:6303 --port 6303 ./storage2
daemonize tahoe -d storage2 run
At this point any client will upload to any of these three
storage-servers. Make a client "alice" and try!
::
tahoe create-client --introducer $(cat introducer/private/introducer.furl) --nickname alice --webport 6401 --shares-total=3 --shares-needed=2 --shares-happy=3 ./alice
daemonize tahoe -d alice run
tahoe -d alice put README.rst # prints out a read-cap
find storage2/storage/shares # confirm storage2 has a share
Now we want to make Alice only upload to the storage servers that the
grid-manager has given certificates to (``storage0`` and
``storage1``). We need the grid-manager's public key to put in Alice's
configuration::
grid-manager --config ./gm0 public-identity
Put the key printed out above into Alice's ``tahoe.cfg`` in section
``client``::
[grid_managers]
example_name = pub-v0-vqimc4s5eflwajttsofisp5st566dbq36xnpp4siz57ufdavpvlq
Now, re-start the "alice" client. Since we made Alice's parameters
require 3 storage servers to be reachable (``--happy=3``), all their
uploads should now fail (so ``tahoe put`` will fail) because they
won't use storage2 and thus can't "achieve happiness".
A proposal to expose more information about Grid Manager and
certificate status in the Welcome page is discussed in
https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3506

View File

@ -1,15 +1,6 @@
"""
Ported to Python 3.
"""
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import sys
import shutil
from time import sleep
@ -49,7 +40,6 @@ from .util import (
await_client_ready,
TahoeProcess,
cli,
_run_node,
generate_ssh_key,
block_with_timeout,
)
@ -66,6 +56,29 @@ def pytest_addoption(parser):
"--coverage", action="store_true", dest="coverage",
help="Collect coverage statistics",
)
parser.addoption(
"--force-foolscap", action="store_true", default=False,
dest="force_foolscap",
help=("If set, force Foolscap only for the storage protocol. " +
"Otherwise HTTP will be used.")
)
parser.addoption(
"--runslow", action="store_true", default=False,
dest="runslow",
help="If set, run tests marked as slow.",
)
def pytest_collection_modifyitems(session, config, items):
if not config.option.runslow:
# The --runslow option was not given; keep only collected items not
# marked as slow.
items[:] = [
item
for item
in items
if item.get_closest_marker("slow") is None
]
@pytest.fixture(autouse=True, scope='session')
def eliot_logging():
@ -410,10 +423,9 @@ alice-key ssh-rsa {ssh_public_key} {rwcap}
""".format(rwcap=rwcap, ssh_public_key=ssh_public_key))
# 4. Restart the node with new SFTP config.
process.kill()
pytest_twisted.blockon(_run_node(reactor, process.node_dir, request, None))
pytest_twisted.blockon(process.restart_async(reactor, request))
await_client_ready(process)
print(f"Alice pid: {process.transport.pid}")
return process

View File

@ -3,11 +3,14 @@ Integration tests for getting and putting files, including reading from stdin
and stdout.
"""
from subprocess import Popen, PIPE
from subprocess import Popen, PIPE, check_output
import sys
import pytest
from pytest_twisted import ensureDeferred
from twisted.internet import reactor
from .util import run_in_thread, cli
from .util import run_in_thread, cli, reconfigure
DATA = b"abc123 this is not utf-8 decodable \xff\x00\x33 \x11"
try:
@ -62,3 +65,51 @@ def test_get_to_stdout(alice, get_put_alias, tmpdir):
)
assert p.stdout.read() == DATA
assert p.wait() == 0
@pytest.mark.skipif(
sys.platform.startswith("win"),
reason="reconfigure() has issues on Windows"
)
@ensureDeferred
async def test_upload_download_immutable_different_default_max_segment_size(alice, get_put_alias, tmpdir, request):
"""
Tahoe-LAFS used to have a default max segment size of 128KB, and is now
1MB. Test that an upload created when 128KB was the default can be
downloaded with 1MB as the default (i.e. old uploader, new downloader), and
vice versa, (new uploader, old downloader).
"""
tempfile = tmpdir.join("file")
large_data = DATA * 100_000
assert len(large_data) > 2 * 1024 * 1024
with tempfile.open("wb") as f:
f.write(large_data)
async def set_segment_size(segment_size):
await reconfigure(
reactor,
request,
alice,
(1, 1, 1),
None,
max_segment_size=segment_size
)
# 1. Upload file 1 with default segment size set to 1MB
await set_segment_size(1024 * 1024)
cli(alice, "put", str(tempfile), "getput:seg1024kb")
# 2. Download file 1 with default segment size set to 128KB
await set_segment_size(128 * 1024)
assert large_data == check_output(
["tahoe", "--node-directory", alice.node_dir, "get", "getput:seg1024kb", "-"]
)
# 3. Upload file 2 with default segment size set to 128KB
cli(alice, "put", str(tempfile), "getput:seg128kb")
# 4. Download file 2 with default segment size set to 1MB
await set_segment_size(1024 * 1024)
assert large_data == check_output(
["tahoe", "--node-directory", alice.node_dir, "get", "getput:seg128kb", "-"]
)

View File

@ -55,9 +55,12 @@ def i2p_network(reactor, temp_dir, request):
proto,
which("docker"),
(
"docker", "run", "-p", "7656:7656", "purplei2p/i2pd:release-2.43.0",
"docker", "run", "-p", "7656:7656", "purplei2p/i2pd:release-2.45.1",
# Bad URL for reseeds, so it can't talk to other routers.
"--reseed.urls", "http://localhost:1/",
# Make sure we see the "ephemeral keys message"
"--log=stdout",
"--loglevel=info"
),
)

121
integration/test_vectors.py Normal file
View File

@ -0,0 +1,121 @@
"""
Verify certain results against test vectors with well-known results.
"""
from __future__ import annotations
from functools import partial
from typing import AsyncGenerator, Iterator
from itertools import starmap, product
from attrs import evolve
from pytest import mark
from pytest_twisted import ensureDeferred
from . import vectors
from .vectors import parameters
from .util import reconfigure, upload, TahoeProcess
@mark.parametrize('convergence', parameters.CONVERGENCE_SECRETS)
def test_convergence(convergence):
"""
Convergence secrets are 16 bytes.
"""
assert isinstance(convergence, bytes), "Convergence secret must be bytes"
assert len(convergence) == 16, "Convergence secret must by 16 bytes"
@mark.slow
@mark.parametrize('case,expected', vectors.capabilities.items())
@ensureDeferred
async def test_capability(reactor, request, alice, case, expected):
"""
The capability that results from uploading certain well-known data
with certain well-known parameters results in exactly the previously
computed value.
"""
# rewrite alice's config to match params and convergence
await reconfigure(
reactor, request, alice, (1, case.params.required, case.params.total), case.convergence, case.segment_size)
# upload data in the correct format
actual = upload(alice, case.fmt, case.data)
# compare the resulting cap to the expected result
assert actual == expected
@ensureDeferred
async def skiptest_generate(reactor, request, alice):
"""
This is a helper for generating the test vectors.
You can re-generate the test vectors by fixing the name of the test and
running it. Normally this test doesn't run because it ran once and we
captured its output. Other tests run against that output and we want them
to run against the results produced originally, not a possibly
ever-changing set of outputs.
"""
space = starmap(
# segment_size could be a parameter someday but it's not easy to vary
# using the Python implementation so it isn't one for now.
partial(vectors.Case, segment_size=parameters.SEGMENT_SIZE),
product(
parameters.ZFEC_PARAMS,
parameters.CONVERGENCE_SECRETS,
parameters.OBJECT_DESCRIPTIONS,
parameters.FORMATS,
),
)
iterresults = generate(reactor, request, alice, space)
results = []
async for result in iterresults:
# Accumulate the new result
results.append(result)
# Then rewrite the whole output file with the new accumulator value.
# This means that if we fail partway through, we will still have
# recorded partial results -- instead of losing them all.
vectors.save_capabilities(results)
async def generate(
reactor,
request,
alice: TahoeProcess,
cases: Iterator[vectors.Case],
) -> AsyncGenerator[[vectors.Case, str], None]:
"""
Generate all of the test vectors using the given node.
:param reactor: The reactor to use to restart the Tahoe-LAFS node when it
needs to be reconfigured.
:param request: The pytest request object to use to arrange process
cleanup.
:param format: The name of the encryption/data format to use.
:param alice: The Tahoe-LAFS node to use to generate the test vectors.
:param case: The inputs for which to generate a value.
:return: The capability for the case.
"""
# Share placement doesn't affect the resulting capability. For maximum
# reliability of this generator, be happy if we can put shares anywhere
happy = 1
for case in cases:
await reconfigure(
reactor,
request,
alice,
(happy, case.params.required, case.params.total),
case.convergence,
case.segment_size
)
# Give the format a chance to make an RSA key if it needs it.
case = evolve(case, fmt=case.fmt.customize())
cap = upload(alice, case.fmt, case.data)
yield case, cap

View File

@ -7,18 +7,9 @@ Most of the tests have cursory asserts and encode 'what the WebAPI did
at the time of testing' -- not necessarily a cohesive idea of what the
WebAPI *should* do in every situation. It's not clear the latter
exists anywhere, however.
Ported to Python 3.
"""
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
import time
from urllib.parse import unquote as url_unquote, quote as url_quote
@ -32,6 +23,7 @@ import requests
import html5lib
from bs4 import BeautifulSoup
from pytest_twisted import ensureDeferred
def test_index(alice):
"""
@ -252,10 +244,18 @@ def test_status(alice):
assert found_download, "Failed to find the file we downloaded in the status-page"
def test_directory_deep_check(alice):
@ensureDeferred
async def test_directory_deep_check(reactor, request, alice):
"""
use deep-check and confirm the result pages work
"""
# Make sure the node is configured compatibly with expectations of this
# test.
happy = 3
required = 2
total = 4
await util.reconfigure(reactor, request, alice, (happy, required, total), convergence=None)
# create a directory
resp = requests.post(
@ -313,7 +313,7 @@ def test_directory_deep_check(alice):
)
def check_repair_data(checkdata):
assert checkdata["healthy"] is True
assert checkdata["healthy"]
assert checkdata["count-happiness"] == 4
assert checkdata["count-good-share-hosts"] == 4
assert checkdata["count-shares-good"] == 4

View File

@ -1,22 +1,19 @@
"""
Ported to Python 3.
General functionality useful for the implementation of integration tests.
"""
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
from contextlib import contextmanager
from typing import Any
from typing_extensions import Literal
from tempfile import NamedTemporaryFile
import sys
import time
import json
from os import mkdir, environ
from os.path import exists, join
from io import StringIO, BytesIO
from functools import partial
from subprocess import check_output
from twisted.python.filepath import (
@ -26,18 +23,30 @@ from twisted.internet.defer import Deferred, succeed
from twisted.internet.protocol import ProcessProtocol
from twisted.internet.error import ProcessExitedAlready, ProcessDone
from twisted.internet.threads import deferToThread
from twisted.internet.interfaces import IProcessTransport, IReactorProcess
from attrs import frozen, evolve
import requests
from cryptography.hazmat.primitives.asymmetric import rsa
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.serialization import (
Encoding,
PrivateFormat,
NoEncryption,
)
from paramiko.rsakey import RSAKey
from boltons.funcutils import wraps
from allmydata.util import base32
from allmydata.util.configutil import (
get_config,
set_config,
write_config,
)
from allmydata import client
from allmydata.interfaces import DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
import pytest_twisted
@ -142,9 +151,40 @@ class _MagicTextProtocol(ProcessProtocol):
sys.stdout.write(data)
def _cleanup_tahoe_process(tahoe_transport, exited):
def _cleanup_process_async(transport: IProcessTransport, allow_missing: bool) -> None:
"""
Terminate the given process with a kill signal (SIGKILL on POSIX,
If the given process transport seems to still be associated with a
running process, send a SIGTERM to that process.
:param transport: The transport to use.
:param allow_missing: If ``True`` then it is not an error for the
transport to have no associated process. Otherwise, an exception will
be raised in that case.
:raise: ``ValueError`` if ``allow_missing`` is ``False`` and the transport
has no process.
"""
if transport.pid is None:
if allow_missing:
print("Process already cleaned up and that's okay.")
return
else:
raise ValueError("Process is not running")
print("signaling {} with TERM".format(transport.pid))
try:
transport.signalProcess('TERM')
except ProcessExitedAlready:
# The transport object thought it still had a process but the real OS
# process has already exited. That's fine. We accomplished what we
# wanted to. We don't care about ``allow_missing`` here because
# there's no way we could have known the real OS process already
# exited.
pass
def _cleanup_tahoe_process(tahoe_transport, exited, allow_missing=False):
"""
Terminate the given process with a kill signal (SIGTERM on POSIX,
TerminateProcess on Windows).
:param tahoe_transport: The `IProcessTransport` representing the process.
@ -153,14 +193,10 @@ def _cleanup_tahoe_process(tahoe_transport, exited):
:return: After the process has exited.
"""
from twisted.internet import reactor
try:
print("signaling {} with TERM".format(tahoe_transport.pid))
tahoe_transport.signalProcess('TERM')
print("signaled, blocking on exit")
block_with_timeout(exited, reactor)
print("exited, goodbye")
except ProcessExitedAlready:
pass
_cleanup_process_async(tahoe_transport, allow_missing=allow_missing)
print("signaled, blocking on exit")
block_with_timeout(exited, reactor)
print("exited, goodbye")
def _tahoe_runner_optional_coverage(proto, reactor, request, other_args):
@ -207,8 +243,33 @@ class TahoeProcess(object):
def kill(self):
"""Kill the process, block until it's done."""
print(f"TahoeProcess.kill({self.transport.pid} / {self.node_dir})")
_cleanup_tahoe_process(self.transport, self.transport.exited)
def kill_async(self):
"""
Kill the process, return a Deferred that fires when it's done.
"""
print(f"TahoeProcess.kill_async({self.transport.pid} / {self.node_dir})")
_cleanup_process_async(self.transport, allow_missing=False)
return self.transport.exited
def restart_async(self, reactor: IReactorProcess, request: Any) -> Deferred:
"""
Stop and then re-start the associated process.
:return: A Deferred that fires after the new process is ready to
handle requests.
"""
d = self.kill_async()
d.addCallback(lambda ignored: _run_node(reactor, self.node_dir, request, None, finalize=False))
def got_new_process(proc):
# Grab the new transport since the one we had before is no longer
# valid after the stop/start cycle.
self._process_transport = proc.transport
d.addCallback(got_new_process)
return d
def __str__(self):
return "<TahoeProcess in '{}'>".format(self._node_dir)
@ -237,19 +298,17 @@ def _run_node(reactor, node_dir, request, magic_text, finalize=True):
)
transport.exited = protocol.exited
tahoe_process = TahoeProcess(
transport,
node_dir,
)
if finalize:
request.addfinalizer(partial(_cleanup_tahoe_process, transport, protocol.exited))
request.addfinalizer(tahoe_process.kill)
# XXX abusing the Deferred; should use .when_magic_seen() pattern
def got_proto(proto):
transport._protocol = proto
return TahoeProcess(
transport,
node_dir,
)
protocol.magic_seen.addCallback(got_proto)
return protocol.magic_seen
d = protocol.magic_seen
d.addCallback(lambda ignored: tahoe_process)
return d
def _create_node(reactor, request, temp_dir, introducer_furl, flog_gatherer, name, web_port,
@ -300,6 +359,20 @@ def _create_node(reactor, request, temp_dir, introducer_furl, flog_gatherer, nam
u'log_gatherer.furl',
flog_gatherer,
)
force_foolscap = request.config.getoption("force_foolscap")
assert force_foolscap in (True, False)
set_config(
config,
'storage',
'force_foolscap',
str(force_foolscap),
)
set_config(
config,
'client',
'force_foolscap',
str(force_foolscap),
)
write_config(FilePath(config_path), config)
created_d.addCallback(created)
@ -572,3 +645,178 @@ def run_in_thread(f):
def test(*args, **kwargs):
return deferToThread(lambda: f(*args, **kwargs))
return test
@frozen
class CHK:
"""
Represent the CHK encoding sufficiently to run a ``tahoe put`` command
using it.
"""
kind = "chk"
max_shares = 256
def customize(self) -> CHK:
# Nothing to do.
return self
@classmethod
def load(cls, params: None) -> CHK:
assert params is None
return cls()
def to_json(self) -> None:
return None
@contextmanager
def to_argv(self) -> None:
yield []
@frozen
class SSK:
"""
Represent the SSK encodings (SDMF and MDMF) sufficiently to run a
``tahoe put`` command using one of them.
"""
kind = "ssk"
# SDMF and MDMF encode share counts (N and k) into the share itself as an
# unsigned byte. They could have encoded (share count - 1) to fit the
# full range supported by ZFEC into the unsigned byte - but they don't.
# So 256 is inaccessible to those formats and we set the upper bound at
# 255.
max_shares = 255
name: Literal["sdmf", "mdmf"]
key: None | bytes
@classmethod
def load(cls, params: dict) -> SSK:
assert params.keys() == {"format", "mutable", "key"}
return cls(params["format"], params["key"].encode("ascii"))
def customize(self) -> SSK:
"""
Return an SSK with a newly generated random RSA key.
"""
return evolve(self, key=generate_rsa_key())
def to_json(self) -> dict[str, str]:
return {
"format": self.name,
"mutable": None,
"key": self.key.decode("ascii"),
}
@contextmanager
def to_argv(self) -> None:
with NamedTemporaryFile() as f:
f.write(self.key)
f.flush()
yield [f"--format={self.name}", "--mutable", f"--private-key-path={f.name}"]
def upload(alice: TahoeProcess, fmt: CHK | SSK, data: bytes) -> str:
"""
Upload the given data to the given node.
:param alice: The node to upload to.
:param fmt: The name of the format for the upload. CHK, SDMF, or MDMF.
:param data: The data to upload.
:return: The capability for the uploaded data.
"""
with NamedTemporaryFile() as f:
f.write(data)
f.flush()
with fmt.to_argv() as fmt_argv:
argv = [alice, "put"] + fmt_argv + [f.name]
return cli(*argv).decode("utf-8").strip()
async def reconfigure(reactor, request, node: TahoeProcess,
params: tuple[int, int, int],
convergence: None | bytes,
max_segment_size: None | int = None) -> None:
"""
Reconfigure a Tahoe-LAFS node with different ZFEC parameters and
convergence secret.
TODO This appears to have issues on Windows.
If the current configuration is different from the specified
configuration, the node will be restarted so it takes effect.
:param reactor: A reactor to use to restart the process.
:param request: The pytest request object to use to arrange process
cleanup.
:param node: The Tahoe-LAFS node to reconfigure.
:param params: The ``happy``, ``needed``, and ``total`` ZFEC encoding
parameters.
:param convergence: If given, the convergence secret. If not given, the
existing convergence secret will be left alone.
:return: ``None`` after the node configuration has been rewritten, the
node has been restarted, and the node is ready to provide service.
"""
happy, needed, total = params
config = node.get_config()
changed = False
cur_happy = int(config.get_config("client", "shares.happy"))
cur_needed = int(config.get_config("client", "shares.needed"))
cur_total = int(config.get_config("client", "shares.total"))
if (happy, needed, total) != (cur_happy, cur_needed, cur_total):
changed = True
config.set_config("client", "shares.happy", str(happy))
config.set_config("client", "shares.needed", str(needed))
config.set_config("client", "shares.total", str(total))
if convergence is not None:
cur_convergence = config.get_private_config("convergence").encode("ascii")
if base32.a2b(cur_convergence) != convergence:
changed = True
config.write_private_config("convergence", base32.b2a(convergence))
if max_segment_size is not None:
cur_segment_size = int(config.get_config("client", "shares._max_immutable_segment_size_for_testing", DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE))
if cur_segment_size != max_segment_size:
changed = True
config.set_config(
"client",
"shares._max_immutable_segment_size_for_testing",
str(max_segment_size)
)
if changed:
# TODO reconfigure() seems to have issues on Windows. If you need to
# use it there, delete this assert and try to figure out what's going
# on...
assert not sys.platform.startswith("win")
# restart the node
print(f"Restarting {node.node_dir} for ZFEC reconfiguration")
await node.restart_async(reactor, request)
print("Restarted. Waiting for ready state.")
await_client_ready(node)
print("Ready.")
else:
print("Config unchanged, not restarting.")
def generate_rsa_key() -> bytes:
"""
Generate a 2048 bit RSA key suitable for use with SSKs.
"""
return rsa.generate_private_key(
public_exponent=65537,
key_size=2048,
backend=default_backend()
).private_bytes(
encoding=Encoding.PEM,
format=PrivateFormat.TraditionalOpenSSL,
encryption_algorithm=NoEncryption(),
)

View File

@ -0,0 +1,30 @@
__all__ = [
"DATA_PATH",
"CURRENT_VERSION",
"MAX_SHARES",
"Case",
"Sample",
"SeedParam",
"encode_bytes",
"save_capabilities",
"capabilities",
]
from .vectors import (
DATA_PATH,
CURRENT_VERSION,
Case,
Sample,
SeedParam,
encode_bytes,
save_capabilities,
capabilities,
)
from .parameters import (
MAX_SHARES,
)

View File

@ -0,0 +1,58 @@
"""
Simple data type definitions useful in the definition/verification of test
vectors.
"""
from __future__ import annotations
from attrs import frozen
# CHK have a max of 256 shares. SDMF / MDMF have a max of 255 shares!
# Represent max symbolically and resolve it when we know what format we're
# dealing with.
MAX_SHARES = "max"
@frozen
class Sample:
"""
Some instructions for building a long byte string.
:ivar seed: Some bytes to repeat some times to produce the string.
:ivar length: The length of the desired byte string.
"""
seed: bytes
length: int
@frozen
class Param:
"""
Some ZFEC parameters.
"""
required: int
total: int
@frozen
class SeedParam:
"""
Some ZFEC parameters, almost.
:ivar required: The number of required shares.
:ivar total: Either the number of total shares or the constant
``MAX_SHARES`` to indicate that the total number of shares should be
the maximum number supported by the object format.
"""
required: int
total: int | str
def realize(self, max_total: int) -> Param:
"""
Create a ``Param`` from this object's values, possibly
substituting the given real value for total if necessary.
:param max_total: The value to use to replace ``MAX_SHARES`` if
necessary.
"""
if self.total == MAX_SHARES:
return Param(self.required, max_total)
return Param(self.required, self.total)

View File

@ -0,0 +1,93 @@
"""
Define input parameters for test vector generation.
:ivar CONVERGENCE_SECRETS: Convergence secrets.
:ivar SEGMENT_SIZE: The single segment size that the Python implementation
currently supports without a lot of refactoring.
:ivar OBJECT_DESCRIPTIONS: Small objects with instructions which can be
expanded into a possibly large byte string. These are intended to be used
as plaintext inputs.
:ivar ZFEC_PARAMS: Input parameters to ZFEC.
:ivar FORMATS: Encoding/encryption formats.
"""
from __future__ import annotations
from hashlib import sha256
from .model import MAX_SHARES
from .vectors import Sample, SeedParam
from ..util import CHK, SSK
def digest(bs: bytes) -> bytes:
"""
Digest bytes to bytes.
"""
return sha256(bs).digest()
def hexdigest(bs: bytes) -> str:
"""
Digest bytes to text.
"""
return sha256(bs).hexdigest()
# Just a couple convergence secrets. The only thing we do with this value is
# feed it into a tagged hash. It certainly makes a difference to the output
# but the hash should destroy any structure in the input so it doesn't seem
# like there's a reason to test a lot of different values.
CONVERGENCE_SECRETS: list[bytes] = [
b"aaaaaaaaaaaaaaaa",
digest(b"Hello world")[:16],
]
SEGMENT_SIZE: int = 128 * 1024
# Exercise at least a handful of different sizes, trying to cover:
#
# 1. Some cases smaller than one "segment" (128k).
# This covers shrinking of some parameters to match data size.
# This includes one case of the smallest possible CHK.
#
# 2. Some cases right on the edges of integer segment multiples.
# Because boundaries are tricky.
#
# 4. Some cases that involve quite a few segments.
# This exercises merkle tree construction more thoroughly.
#
# See ``stretch`` for construction of the actual test data.
OBJECT_DESCRIPTIONS: list[Sample] = [
# The smallest possible. 55 bytes and smaller are LIT.
Sample(b"a", 56),
Sample(b"a", 1024),
Sample(b"c", 4096),
Sample(digest(b"foo"), SEGMENT_SIZE - 1),
Sample(digest(b"bar"), SEGMENT_SIZE + 1),
Sample(digest(b"baz"), SEGMENT_SIZE * 16 - 1),
Sample(digest(b"quux"), SEGMENT_SIZE * 16 + 1),
Sample(digest(b"bazquux"), SEGMENT_SIZE * 32),
Sample(digest(b"foobar"), SEGMENT_SIZE * 64 - 1),
Sample(digest(b"barbaz"), SEGMENT_SIZE * 64 + 1),
]
ZFEC_PARAMS: list[SeedParam] = [
SeedParam(1, 1),
SeedParam(1, 3),
SeedParam(2, 3),
SeedParam(3, 10),
SeedParam(71, 255),
SeedParam(101, MAX_SHARES),
]
FORMATS: list[CHK | SSK] = [
CHK(),
# These start out unaware of a key but various keys will be supplied
# during generation.
SSK(name="sdmf", key=None),
SSK(name="mdmf", key=None),
]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,155 @@
"""
A module that loads pre-generated test vectors.
:ivar DATA_PATH: The path of the file containing test vectors.
:ivar capabilities: The capability test vectors.
"""
from __future__ import annotations
from typing import TextIO
from attrs import frozen
from yaml import safe_load, safe_dump
from base64 import b64encode, b64decode
from twisted.python.filepath import FilePath
from .model import Param, Sample, SeedParam
from ..util import CHK, SSK
DATA_PATH: FilePath = FilePath(__file__).sibling("test_vectors.yaml")
# The version of the persisted test vector data this code can interpret.
CURRENT_VERSION: str = "2023-01-16.2"
@frozen
class Case:
"""
Represent one case for which we want/have a test vector.
"""
seed_params: Param
convergence: bytes
seed_data: Sample
fmt: CHK | SSK
segment_size: int
@property
def data(self):
return stretch(self.seed_data.seed, self.seed_data.length)
@property
def params(self):
return self.seed_params.realize(self.fmt.max_shares)
def encode_bytes(b: bytes) -> str:
"""
Base64 encode some bytes to text so they are representable in JSON.
"""
return b64encode(b).decode("ascii")
def decode_bytes(b: str) -> bytes:
"""
Base64 decode some text to bytes.
"""
return b64decode(b.encode("ascii"))
def stretch(seed: bytes, size: int) -> bytes:
"""
Given a simple description of a byte string, return the byte string
itself.
"""
assert isinstance(seed, bytes)
assert isinstance(size, int)
assert size > 0
assert len(seed) > 0
multiples = size // len(seed) + 1
return (seed * multiples)[:size]
def save_capabilities(results: list[tuple[Case, str]], path: FilePath = DATA_PATH) -> None:
"""
Save some test vector cases and their expected values.
This is logically the inverse of ``load_capabilities``.
"""
path.setContent(safe_dump({
"version": CURRENT_VERSION,
"vector": [
{
"convergence": encode_bytes(case.convergence),
"format": {
"kind": case.fmt.kind,
"params": case.fmt.to_json(),
},
"sample": {
"seed": encode_bytes(case.seed_data.seed),
"length": case.seed_data.length,
},
"zfec": {
"segmentSize": case.segment_size,
"required": case.params.required,
"total": case.params.total,
},
"expected": cap,
}
for (case, cap)
in results
],
}).encode("ascii"))
def load_format(serialized: dict) -> CHK | SSK:
"""
Load an encrypted object format from a simple description of it.
:param serialized: A ``dict`` describing either CHK or SSK, possibly with
some parameters.
"""
if serialized["kind"] == "chk":
return CHK.load(serialized["params"])
elif serialized["kind"] == "ssk":
return SSK.load(serialized["params"])
else:
raise ValueError(f"Unrecognized format: {serialized}")
def load_capabilities(f: TextIO) -> dict[Case, str]:
"""
Load some test vector cases and their expected results from the given
file.
This is logically the inverse of ``save_capabilities``.
"""
data = safe_load(f)
if data is None:
return {}
if data["version"] != CURRENT_VERSION:
print(
f"Current version is {CURRENT_VERSION}; "
f"cannot load version {data['version']} data."
)
return {}
return {
Case(
seed_params=SeedParam(case["zfec"]["required"], case["zfec"]["total"]),
segment_size=case["zfec"]["segmentSize"],
convergence=decode_bytes(case["convergence"]),
seed_data=Sample(decode_bytes(case["sample"]["seed"]), case["sample"]["length"]),
fmt=load_format(case["format"]),
): case["expected"]
for case
in data["vector"]
}
try:
with DATA_PATH.open() as f:
capabilities: dict[Case, str] = load_capabilities(f)
except FileNotFoundError:
capabilities = {}

View File

@ -5,7 +5,7 @@ from __future__ import print_function
import sys, math
from allmydata import uri, storage
from allmydata.immutable import upload
from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE
from allmydata.interfaces import DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
from allmydata.util import mathutil
def roundup(size, blocksize=4096):
@ -26,7 +26,7 @@ class BigFakeString(object):
def tell(self):
return self.fp
def calc(filesize, params=(3,7,10), segsize=DEFAULT_MAX_SEGMENT_SIZE):
def calc(filesize, params=(3,7,10), segsize=DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE):
num_shares = params[2]
if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD:
urisize = len(uri.LiteralFileURI("A"*filesize).to_string())

View File

@ -0,0 +1,36 @@
"""
Writing to non-blocking pipe can result in ENOSPC when using Unix APIs on
Windows. So, this program passes through data from stdin to stdout, using
Windows APIs instead of Unix-y APIs.
"""
from twisted.internet.stdio import StandardIO
from twisted.internet import reactor
from twisted.internet.protocol import Protocol
from twisted.internet.interfaces import IHalfCloseableProtocol
from twisted.internet.error import ReactorNotRunning
from zope.interface import implementer
@implementer(IHalfCloseableProtocol)
class Passthrough(Protocol):
def readConnectionLost(self):
self.transport.loseConnection()
def writeConnectionLost(self):
try:
reactor.stop()
except ReactorNotRunning:
pass
def dataReceived(self, data):
self.transport.write(data)
def connectionLost(self, reason):
try:
reactor.stop()
except ReactorNotRunning:
pass
std = StandardIO(Passthrough())
reactor.run()

View File

@ -1,3 +1,10 @@
[mypy]
ignore_missing_imports = True
plugins=mypy_zope:plugin
show_column_numbers = True
pretty = True
show_error_codes = True
warn_unused_configs =True
no_implicit_optional = True
warn_redundant_casts = True
strict_equality = True

View File

@ -0,0 +1 @@
Tahoe-LAFS now includes a new "Grid Manager" specification and implementation adding more options to control which storage servers a client will use for uploads.

0
newsfragments/3870.minor Normal file
View File

0
newsfragments/3874.minor Normal file
View File

0
newsfragments/3914.minor Normal file
View File

View File

@ -0,0 +1,5 @@
`tahoe run ...` will now exit when its stdin is closed.
This facilitates subprocess management, specifically cleanup.
When a parent process is running tahoe and exits without time to do "proper" cleanup at least the stdin descriptor will be closed.
Subsequently "tahoe run" notices this and exits.

0
newsfragments/3936.minor Normal file
View File

0
newsfragments/3937.minor Normal file
View File

View File

@ -0,0 +1 @@
Uploading immutables will now better use available bandwidth, which should allow for faster uploads in many cases.

0
newsfragments/3940.minor Normal file
View File

1
newsfragments/3942.minor Normal file
View File

@ -0,0 +1 @@

0
newsfragments/3944.minor Normal file
View File

View File

@ -0,0 +1 @@
Downloads of large immutables should now finish much faster.

0
newsfragments/3947.minor Normal file
View File

0
newsfragments/3950.minor Normal file
View File

0
newsfragments/3952.minor Normal file
View File

0
newsfragments/3953.minor Normal file
View File

0
newsfragments/3954.minor Normal file
View File

0
newsfragments/3956.minor Normal file
View File

0
newsfragments/3958.minor Normal file
View File

0
newsfragments/3960.minor Normal file
View File

1
newsfragments/3961.other Normal file
View File

@ -0,0 +1 @@
The integration test suite now includes a set of capability test vectors (``integration/vectors/test_vectors.yaml``) which can be used to verify compatibility between Tahoe-LAFS and other implementations.

View File

@ -0,0 +1 @@
Mutable objects can now be created with a pre-determined "signature key" using the ``tahoe put`` CLI or the HTTP API. This enables deterministic creation of mutable capabilities. This feature must be used with care to preserve the normal security and reliability properties.

View File

@ -0,0 +1 @@
Python 3.7 is no longer supported, and Debian 10 and Ubuntu 18.04 are no longer tested.

0
newsfragments/3965.minor Normal file
View File

View File

@ -0,0 +1 @@
Fix incompatibility with transitive dependency charset_normalizer >= 3 when using PyInstaller.

0
newsfragments/3967.minor Normal file
View File

0
newsfragments/3968.minor Normal file
View File

0
newsfragments/3969.minor Normal file
View File

1
newsfragments/3971.minor Normal file
View File

@ -0,0 +1 @@
Changes made to mypy.ini to make mypy more 'strict' and prevent future regressions.

0
newsfragments/3974.minor Normal file
View File

1
newsfragments/3975.minor Normal file
View File

@ -0,0 +1 @@
Fixes truthy conditional in status.py

1
newsfragments/3976.minor Normal file
View File

@ -0,0 +1 @@
Fixes variable name same as built-in type.

View File

@ -0,0 +1 @@
Added support for Python 3.11.

View File

@ -1,14 +1,14 @@
{
"mach-nix": {
"branch": "master",
"branch": "switch-to-nix-pypi-fetcher-2",
"description": "Create highly reproducible python environments",
"homepage": "",
"owner": "davhau",
"owner": "PrivateStorageio",
"repo": "mach-nix",
"rev": "bdc97ba6b2ecd045a467b008cff4ae337b6a7a6b",
"sha256": "12b3jc0g0ak6s93g3ifvdpwxbyqx276k1kl66bpwz8a67qjbcbwf",
"rev": "f6d1a1841d8778c199326f95d0703c16bee2f8c4",
"sha256": "0krc4yhnpbzc4yhja9frnmym2vqm5zyacjnqb3fq9z9gav8vs9ls",
"type": "tarball",
"url": "https://github.com/davhau/mach-nix/archive/bdc97ba6b2ecd045a467b008cff4ae337b6a7a6b.tar.gz",
"url": "https://github.com/PrivateStorageio/mach-nix/archive/f6d1a1841d8778c199326f95d0703c16bee2f8c4.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
},
"niv": {
@ -53,10 +53,10 @@
"homepage": "",
"owner": "DavHau",
"repo": "pypi-deps-db",
"rev": "5fe7d2d1c85cd86d64f4f079eef3f1ff5653bcd6",
"sha256": "0pc6mj7rzvmhh303rvj5wf4hrksm4h2rf4fsvqs0ljjdmgxrqm3f",
"rev": "5440c9c76f6431f300fb6a1ecae762a5444de5f6",
"sha256": "08r3iiaxzw9v2gq15y1m9bwajshyyz9280g6aia7mkgnjs9hnd1n",
"type": "tarball",
"url": "https://github.com/DavHau/pypi-deps-db/archive/5fe7d2d1c85cd86d64f4f079eef3f1ff5653bcd6.tar.gz",
"url": "https://github.com/DavHau/pypi-deps-db/archive/5440c9c76f6431f300fb6a1ecae762a5444de5f6.tar.gz",
"url_template": "https://github.com/<owner>/<repo>/archive/<rev>.tar.gz"
}
}

View File

@ -36,6 +36,7 @@ hidden_imports = [
'allmydata.stats',
'base64',
'cffi',
'charset_normalizer.md__mypyc',
'collections',
'commands',
'Crypto',

3
pytest.ini Normal file
View File

@ -0,0 +1,3 @@
[pytest]
markers =
slow: marks tests as slow (not run by default; run them with '--runslow')

View File

@ -55,7 +55,9 @@ install_requires = [
# * foolscap >= 0.12.6 has an i2p.sam_endpoint() that takes kwargs
# * foolscap 0.13.2 drops i2p support completely
# * foolscap >= 21.7 is necessary for Python 3 with i2p support.
# * foolscap >= 23.3 is necessary for Python 3.11.
"foolscap >= 21.7.0",
"foolscap >= 23.3.0; python_version > '3.10'",
# * cryptography 2.6 introduced some ed25519 APIs we rely on. Note that
# Twisted[conch] also depends on cryptography and Twisted[tls]
@ -96,7 +98,9 @@ install_requires = [
# an sftp extra in Tahoe-LAFS, there is no point in having one.
# * Twisted 19.10 introduces Site.getContentFile which we use to get
# temporary upload files placed into a per-node temporary directory.
"Twisted[tls,conch] >= 19.10.0",
# * Twisted 22.8.0 added support for coroutine-returning functions in many
# places (mainly via `maybeDeferred`)
"Twisted[tls,conch] >= 22.8.0",
"PyYAML >= 3.11",
@ -137,7 +141,13 @@ install_requires = [
"werkzeug != 2.2.0",
"treq",
"cbor2",
"pycddl >= 0.2",
# 0.4 adds the ability to pass in mmap() values which greatly reduces the
# amount of copying involved.
"pycddl >= 0.4",
# Command-line parsing
"click >= 7.0",
# for pid-file support
"psutil",
@ -221,7 +231,7 @@ def run_command(args, cwd=None):
use_shell = sys.platform == "win32"
try:
p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd, shell=use_shell)
except EnvironmentError as e: # if this gives a SyntaxError, note that Tahoe-LAFS requires Python 3.7+
except EnvironmentError as e: # if this gives a SyntaxError, note that Tahoe-LAFS requires Python 3.8+
print("Warning: unable to run %r." % (" ".join(args),))
print(e)
return None
@ -372,8 +382,8 @@ setup(name="tahoe-lafs", # also set in __init__.py
package_dir = {'':'src'},
packages=find_packages('src') + ['allmydata.test.plugins'],
classifiers=trove_classifiers,
# We support Python 3.7 or later. 3.11 is not supported yet.
python_requires=">=3.7, <3.11",
# We support Python 3.8 or later, 3.12 is untested for now
python_requires=">=3.8, <3.12",
install_requires=install_requires,
extras_require={
# Duplicate the Twisted pywin32 dependency here. See
@ -386,9 +396,6 @@ setup(name="tahoe-lafs", # also set in __init__.py
],
"test": [
"flake8",
# On Python 3.7, importlib_metadata v5 breaks flake8.
# https://github.com/python/importlib_metadata/issues/407
"importlib_metadata<5; python_version < '3.8'",
# Pin a specific pyflakes so we don't have different folks
# disagreeing on what is or is not a lint issue. We can bump
# this version from time to time, but we will do it
@ -396,7 +403,7 @@ setup(name="tahoe-lafs", # also set in __init__.py
"pyflakes == 2.2.0",
"coverage ~= 5.0",
"mock",
"tox",
"tox ~= 3.0",
"pytest",
"pytest-twisted",
"hypothesis >= 3.6.1",
@ -426,6 +433,11 @@ setup(name="tahoe-lafs", # also set in __init__.py
},
include_package_data=True,
setup_requires=setup_requires,
entry_points = { 'console_scripts': [ 'tahoe = allmydata.scripts.runner:run' ] },
entry_points={
'console_scripts': [
'tahoe = allmydata.scripts.runner:run',
'grid-manager = allmydata.cli.grid_manager:grid_manager',
]
},
**setup_args
)

View File

View File

@ -0,0 +1,224 @@
"""
A CLI for configuring a grid manager.
"""
from typing import Optional
from datetime import (
timedelta,
)
import click
from twisted.python.filepath import (
FilePath,
)
from allmydata.crypto import (
ed25519,
)
from allmydata.util.abbreviate import (
abbreviate_time,
)
from allmydata.grid_manager import (
create_grid_manager,
save_grid_manager,
load_grid_manager,
current_datetime_with_zone,
)
from allmydata.util import jsonbytes as json
@click.group()
@click.option(
'--config', '-c',
type=click.Path(),
help="Configuration directory (or - for stdin)",
required=True,
)
@click.pass_context
def grid_manager(ctx, config):
"""
A Tahoe Grid Manager issues certificates to storage-servers
A Tahoe client with one or more Grid Manager public keys
configured will only upload to a Storage Server that presents a
valid certificate signed by one of the configured Grid
Manager keys.
Grid Manager configuration can be in a local directory or given
via stdin. It contains long-term secret information (a private
signing key) and should be kept safe.
"""
class Config(object):
"""
Available to all sub-commands as Click's context.obj
"""
_grid_manager = None
@property
def grid_manager(self):
if self._grid_manager is None:
config_path = _config_path_from_option(config)
try:
self._grid_manager = load_grid_manager(config_path)
except ValueError as e:
raise click.ClickException(
"Error loading Grid Manager from '{}': {}".format(config, e)
)
return self._grid_manager
ctx.obj = Config()
@grid_manager.command()
@click.pass_context
def create(ctx):
"""
Make a new Grid Manager
"""
config_location = ctx.parent.params["config"]
fp = None
if config_location != '-':
fp = FilePath(config_location)
gm = create_grid_manager()
try:
save_grid_manager(fp, gm)
except OSError as e:
raise click.ClickException(
"Can't create '{}': {}".format(config_location, e)
)
@grid_manager.command()
@click.pass_obj
def public_identity(config):
"""
Show the public identity key of a Grid Manager
This is what you give to clients to add to their configuration so
they use announcements from this Grid Manager
"""
click.echo(config.grid_manager.public_identity())
@grid_manager.command()
@click.argument("name")
@click.argument("public_key", type=click.STRING)
@click.pass_context
def add(ctx, name, public_key):
"""
Add a new storage-server by name to a Grid Manager
PUBLIC_KEY is the contents of a node.pubkey file from a Tahoe
node-directory. NAME is an arbitrary label.
"""
public_key = public_key.encode("ascii")
try:
ctx.obj.grid_manager.add_storage_server(
name,
ed25519.verifying_key_from_string(public_key),
)
except KeyError:
raise click.ClickException(
"A storage-server called '{}' already exists".format(name)
)
save_grid_manager(
_config_path_from_option(ctx.parent.params["config"]),
ctx.obj.grid_manager,
create=False,
)
return 0
@grid_manager.command()
@click.argument("name")
@click.pass_context
def remove(ctx, name):
"""
Remove an existing storage-server by name from a Grid Manager
"""
fp = _config_path_from_option(ctx.parent.params["config"])
try:
ctx.obj.grid_manager.remove_storage_server(name)
except KeyError:
raise click.ClickException(
"No storage-server called '{}' exists".format(name)
)
cert_count = 0
if fp is not None:
while fp.child('{}.cert.{}'.format(name, cert_count)).exists():
fp.child('{}.cert.{}'.format(name, cert_count)).remove()
cert_count += 1
save_grid_manager(fp, ctx.obj.grid_manager, create=False)
@grid_manager.command() # noqa: F811
@click.pass_context
def list(ctx):
"""
List all storage-servers known to a Grid Manager
"""
for name in sorted(ctx.obj.grid_manager.storage_servers.keys()):
blank_name = " " * len(name)
click.echo("{}: {}".format(
name,
str(ctx.obj.grid_manager.storage_servers[name].public_key_string(), "utf-8")))
for cert in ctx.obj.grid_manager.storage_servers[name].certificates:
delta = current_datetime_with_zone() - cert.expires
click.echo("{} cert {}: ".format(blank_name, cert.index), nl=False)
if delta.total_seconds() < 0:
click.echo("valid until {} ({})".format(cert.expires, abbreviate_time(delta)))
else:
click.echo("expired {} ({})".format(cert.expires, abbreviate_time(delta)))
@grid_manager.command()
@click.argument("name")
@click.argument(
"expiry_days",
type=click.IntRange(1, 5*365), # XXX is 5 years a good maximum?
)
@click.pass_context
def sign(ctx, name, expiry_days):
"""
sign a new certificate
"""
fp = _config_path_from_option(ctx.parent.params["config"])
expiry = timedelta(days=expiry_days)
try:
certificate = ctx.obj.grid_manager.sign(name, expiry)
except KeyError:
raise click.ClickException(
"No storage-server called '{}' exists".format(name)
)
certificate_data = json.dumps(certificate.marshal(), indent=4)
click.echo(certificate_data)
if fp is not None:
next_serial = 0
f = None
while f is None:
fname = "{}.cert.{}".format(name, next_serial)
try:
f = fp.child(fname).create()
except FileExistsError:
f = None
except OSError as e:
raise click.ClickException(f"{fname}: {e}")
next_serial += 1
with f:
f.write(certificate_data.encode("ascii"))
def _config_path_from_option(config: str) -> Optional[FilePath]:
"""
:param str config: a path or -
:returns: a FilePath instance or None
"""
if config == "-":
return None
return FilePath(config)

View File

@ -3,8 +3,11 @@ Ported to Python 3.
"""
from __future__ import annotations
import os
import stat
import time
import weakref
from typing import Optional
import os, stat, time, weakref
from base64 import urlsafe_b64encode
from functools import partial
# On Python 2 this will be the backported package:
@ -26,12 +29,14 @@ from twisted.application.internet import TimerService
from twisted.python.filepath import FilePath
import allmydata
from allmydata import node
from allmydata.crypto import rsa, ed25519
from allmydata.crypto.util import remove_prefix
from allmydata.storage.server import StorageServer, FoolscapStorageServer
from allmydata import storage_client
from allmydata.immutable.upload import Uploader
from allmydata.immutable.offloaded import Helper
from allmydata.mutable.filenode import MutableFileNode
from allmydata.introducer.client import IntroducerClient
from allmydata.util import (
hashutil, base32, pollmixin, log, idlib,
@ -49,14 +54,13 @@ from allmydata.interfaces import (
IStatsProducer,
SDMF_VERSION,
MDMF_VERSION,
DEFAULT_MAX_SEGMENT_SIZE,
DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE,
IFoolscapStoragePlugin,
IAnnounceableStorageServer,
)
from allmydata.nodemaker import NodeMaker
from allmydata.blacklist import Blacklist
from allmydata import node
from allmydata.node import _Config
KiB=1024
MiB=1024*KiB
@ -72,7 +76,8 @@ def _is_valid_section(section_name):
"""
return (
section_name.startswith("storageserver.plugins.") or
section_name.startswith("storageclient.plugins.")
section_name.startswith("storageclient.plugins.") or
section_name in ("grid_managers", "grid_manager_certificates")
)
@ -87,7 +92,9 @@ _client_config = configutil.ValidConfiguration(
"shares.happy",
"shares.needed",
"shares.total",
"shares._max_immutable_segment_size_for_testing",
"storage.plugins",
"force_foolscap",
),
"storage": (
"debug_discard",
@ -104,6 +111,7 @@ _client_config = configutil.ValidConfiguration(
"reserved_space",
"storage_dir",
"plugins",
"grid_management",
"force_foolscap",
),
"sftpd": (
@ -458,7 +466,7 @@ def create_introducer_clients(config, main_tub, _introducer_factory=None):
return introducer_clients
def create_storage_farm_broker(config, default_connection_handlers, foolscap_connection_handlers, tub_options, introducer_clients):
def create_storage_farm_broker(config: _Config, default_connection_handlers, foolscap_connection_handlers, tub_options, introducer_clients):
"""
Create a StorageFarmBroker object, for use by Uploader/Downloader
(and everybody else who wants to use storage servers)
@ -488,6 +496,7 @@ def create_storage_farm_broker(config, default_connection_handlers, foolscap_con
**kwargs
)
# create the actual storage-broker
sb = storage_client.StorageFarmBroker(
permute_peers=True,
tub_maker=tub_creator,
@ -605,7 +614,7 @@ class _Client(node.Node, pollmixin.PollMixin):
DEFAULT_ENCODING_PARAMETERS = {"k": 3,
"happy": 7,
"n": 10,
"max_segment_size": DEFAULT_MAX_SEGMENT_SIZE,
"max_segment_size": DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE,
}
def __init__(self, config, main_tub, i2p_provider, tor_provider, introducer_clients,
@ -794,16 +803,18 @@ class _Client(node.Node, pollmixin.PollMixin):
sharetypes.append("mutable")
expiration_sharetypes = tuple(sharetypes)
ss = StorageServer(storedir, self.nodeid,
reserved_space=reserved,
discard_storage=discard,
readonly_storage=readonly,
stats_provider=self.stats_provider,
expiration_enabled=expire,
expiration_mode=mode,
expiration_override_lease_duration=o_l_d,
expiration_cutoff_date=cutoff_date,
expiration_sharetypes=expiration_sharetypes)
ss = StorageServer(
storedir, self.nodeid,
reserved_space=reserved,
discard_storage=discard,
readonly_storage=readonly,
stats_provider=self.stats_provider,
expiration_enabled=expire,
expiration_mode=mode,
expiration_override_lease_duration=o_l_d,
expiration_cutoff_date=cutoff_date,
expiration_sharetypes=expiration_sharetypes,
)
ss.setServiceParent(self)
return ss
@ -845,6 +856,14 @@ class _Client(node.Node, pollmixin.PollMixin):
announcement.update(plugins_announcement)
if self.config.get_config("storage", "grid_management", default=False, boolean=True):
grid_manager_certificates = self.config.get_grid_manager_certificates()
announcement[u"grid-manager-certificates"] = grid_manager_certificates
# Note: certificates are not verified for validity here, but
# that may be useful. See:
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3977
for ic in self.introducer_clients:
ic.publish("storage", announcement, self._node_private_key)
@ -895,6 +914,13 @@ class _Client(node.Node, pollmixin.PollMixin):
DEP["k"] = int(self.config.get_config("client", "shares.needed", DEP["k"]))
DEP["n"] = int(self.config.get_config("client", "shares.total", DEP["n"]))
DEP["happy"] = int(self.config.get_config("client", "shares.happy", DEP["happy"]))
# At the moment this is only used for testing, thus the janky config
# attribute name.
DEP["max_segment_size"] = int(self.config.get_config(
"client",
"shares._max_immutable_segment_size_for_testing",
DEP["max_segment_size"])
)
# for the CLI to authenticate to local JSON endpoints
self._create_auth_token()
@ -1086,9 +1112,40 @@ class _Client(node.Node, pollmixin.PollMixin):
def create_immutable_dirnode(self, children, convergence=None):
return self.nodemaker.create_immutable_directory(children, convergence)
def create_mutable_file(self, contents=None, version=None):
def create_mutable_file(
self,
contents: bytes | None = None,
version: int | None = None,
*,
unique_keypair: tuple[rsa.PublicKey, rsa.PrivateKey] | None = None,
) -> MutableFileNode:
"""
Create *and upload* a new mutable object.
:param contents: If given, the initial contents for the new object.
:param version: If given, the mutable file format for the new object
(otherwise a format will be chosen automatically).
:param unique_keypair: **Warning** This value independently determines
the identity of the mutable object to create. There cannot be two
different mutable objects that share a keypair. They will merge
into one object (with undefined contents).
It is common to pass a None value (or not pass a valuye) for this
parameter. In these cases, a new random keypair will be
generated.
If non-None, the given public/private keypair will be used for the
new object. The expected use-case is for implementing compliance
tests.
:return: A Deferred which will fire with a representation of the new
mutable object after it has been uploaded.
"""
return self.nodemaker.create_mutable_file(contents,
version=version)
version=version,
keypair=unique_keypair)
def upload(self, uploadable, reactor=None):
uploader = self.getServiceNamed("uploader")

View File

@ -13,20 +13,7 @@ cut-and-pasteability. The base62 encoding is shorter than the base32 form,
but the minor usability improvement is not worth the documentation and
specification confusion of using a non-standard encoding. So we stick with
base32.
Ported to Python 3.
'''
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
import six
from cryptography.exceptions import (
InvalidSignature,
@ -72,7 +59,7 @@ def verifying_key_from_signing_key(private_key):
return private_key.public_key()
def sign_data(private_key, data):
def sign_data(private_key, data: bytes) -> bytes:
"""
Sign the given data using the given private key
@ -86,7 +73,7 @@ def sign_data(private_key, data):
"""
_validate_private_key(private_key)
if not isinstance(data, six.binary_type):
if not isinstance(data, bytes):
raise ValueError('data must be bytes')
return private_key.sign(data)
@ -110,7 +97,7 @@ def string_from_signing_key(private_key):
return PRIVATE_KEY_PREFIX + b2a(raw_key_bytes)
def signing_keypair_from_string(private_key_bytes):
def signing_keypair_from_string(private_key_bytes: bytes):
"""
Load a signing keypair from a string of bytes (which includes the
PRIVATE_KEY_PREFIX)
@ -118,7 +105,7 @@ def signing_keypair_from_string(private_key_bytes):
:returns: a 2-tuple of (private_key, public_key)
"""
if not isinstance(private_key_bytes, six.binary_type):
if not isinstance(private_key_bytes, bytes):
raise ValueError('private_key_bytes must be bytes')
private_key = Ed25519PrivateKey.from_private_bytes(
@ -127,7 +114,7 @@ def signing_keypair_from_string(private_key_bytes):
return private_key, private_key.public_key()
def verify_signature(public_key, alleged_signature, data):
def verify_signature(public_key, alleged_signature: bytes, data: bytes):
"""
:param public_key: a verifying key
@ -139,10 +126,10 @@ def verify_signature(public_key, alleged_signature, data):
:returns: None (or raises an exception).
"""
if not isinstance(alleged_signature, six.binary_type):
if not isinstance(alleged_signature, bytes):
raise ValueError('alleged_signature must be bytes')
if not isinstance(data, six.binary_type):
if not isinstance(data, bytes):
raise ValueError('data must be bytes')
_validate_public_key(public_key)
@ -159,7 +146,7 @@ def verifying_key_from_string(public_key_bytes):
:returns: a public_key
"""
if not isinstance(public_key_bytes, six.binary_type):
if not isinstance(public_key_bytes, bytes):
raise ValueError('public_key_bytes must be bytes')
return Ed25519PublicKey.from_public_bytes(
@ -167,7 +154,7 @@ def verifying_key_from_string(public_key_bytes):
)
def string_from_verifying_key(public_key):
def string_from_verifying_key(public_key) -> bytes:
"""
Encode a public key to a string of bytes
@ -183,7 +170,7 @@ def string_from_verifying_key(public_key):
return PUBLIC_KEY_PREFIX + b2a(raw_key_bytes)
def _validate_public_key(public_key):
def _validate_public_key(public_key: Ed25519PublicKey):
"""
Internal helper. Verify that `public_key` is an appropriate object
"""
@ -192,7 +179,7 @@ def _validate_public_key(public_key):
return None
def _validate_private_key(private_key):
def _validate_private_key(private_key: Ed25519PrivateKey):
"""
Internal helper. Verify that `private_key` is an appropriate object
"""

View File

@ -9,17 +9,14 @@ features of any objects that `cryptography` documents.
That is, the public and private keys are opaque objects; DO NOT depend
on any of their methods.
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
from typing_extensions import TypeAlias
from typing import Callable
from functools import partial
from cryptography.exceptions import InvalidSignature
from cryptography.hazmat.backends import default_backend
@ -30,6 +27,8 @@ from cryptography.hazmat.primitives.serialization import load_der_private_key, l
from allmydata.crypto.error import BadSignature
PublicKey: TypeAlias = rsa.RSAPublicKey
PrivateKey: TypeAlias = rsa.RSAPrivateKey
# This is the value that was used by `pycryptopp`, and we must continue to use it for
# both backwards compatibility and interoperability.
@ -46,12 +45,12 @@ RSA_PADDING = padding.PSS(
def create_signing_keypair(key_size):
def create_signing_keypair(key_size: int) -> tuple[PrivateKey, PublicKey]:
"""
Create a new RSA signing (private) keypair from scratch. Can be used with
`sign_data` function.
:param int key_size: length of key in bits
:param key_size: length of key in bits
:returns: 2-tuple of (private_key, public_key)
"""
@ -63,32 +62,62 @@ def create_signing_keypair(key_size):
return priv_key, priv_key.public_key()
def create_signing_keypair_from_string(private_key_der):
def create_signing_keypair_from_string(private_key_der: bytes) -> tuple[PrivateKey, PublicKey]:
"""
Create an RSA signing (private) key from previously serialized
private key bytes.
:param bytes private_key_der: blob as returned from `der_string_from_signing_keypair`
:param private_key_der: blob as returned from `der_string_from_signing_keypair`
:returns: 2-tuple of (private_key, public_key)
"""
priv_key = load_der_private_key(
_load = partial(
load_der_private_key,
private_key_der,
password=None,
backend=default_backend(),
)
if not isinstance(priv_key, rsa.RSAPrivateKey):
def load_with_validation() -> PrivateKey:
k = _load()
assert isinstance(k, PrivateKey)
return k
def load_without_validation() -> PrivateKey:
k = _load(unsafe_skip_rsa_key_validation=True)
assert isinstance(k, PrivateKey)
return k
# Load it once without the potentially expensive OpenSSL validation
# checks. These have superlinear complexity. We *will* run them just
# below - but first we'll apply our own constant-time checks.
load: Callable[[], PrivateKey] = load_without_validation
try:
unsafe_priv_key = load()
except TypeError:
# cryptography<39 does not support this parameter, so just load the
# key with validation...
unsafe_priv_key = load_with_validation()
# But avoid *reloading* it since that will run the expensive
# validation *again*.
load = lambda: unsafe_priv_key
if not isinstance(unsafe_priv_key, rsa.RSAPrivateKey):
raise ValueError(
"Private Key did not decode to an RSA key"
)
if priv_key.key_size != 2048:
if unsafe_priv_key.key_size != 2048:
raise ValueError(
"Private Key must be 2048 bits"
)
return priv_key, priv_key.public_key()
# Now re-load it with OpenSSL's validation applied.
safe_priv_key = load()
return safe_priv_key, safe_priv_key.public_key()
def der_string_from_signing_key(private_key):
def der_string_from_signing_key(private_key: PrivateKey) -> bytes:
"""
Serializes a given RSA private key to a DER string
@ -98,14 +127,14 @@ def der_string_from_signing_key(private_key):
:returns: bytes representing `private_key`
"""
_validate_private_key(private_key)
return private_key.private_bytes(
return private_key.private_bytes( # type: ignore[attr-defined]
encoding=Encoding.DER,
format=PrivateFormat.PKCS8,
encryption_algorithm=NoEncryption(),
)
def der_string_from_verifying_key(public_key):
def der_string_from_verifying_key(public_key: PublicKey) -> bytes:
"""
Serializes a given RSA public key to a DER string.
@ -121,7 +150,7 @@ def der_string_from_verifying_key(public_key):
)
def create_verifying_key_from_string(public_key_der):
def create_verifying_key_from_string(public_key_der: bytes) -> PublicKey:
"""
Create an RSA verifying key from a previously serialized public key
@ -134,15 +163,16 @@ def create_verifying_key_from_string(public_key_der):
public_key_der,
backend=default_backend(),
)
assert isinstance(pub_key, PublicKey)
return pub_key
def sign_data(private_key, data):
def sign_data(private_key: PrivateKey, data: bytes) -> bytes:
"""
:param private_key: the private part of a keypair returned from
`create_signing_keypair_from_string` or `create_signing_keypair`
:param bytes data: the bytes to sign
:param data: the bytes to sign
:returns: bytes which are a signature of the bytes given as `data`.
"""
@ -153,7 +183,7 @@ def sign_data(private_key, data):
hashes.SHA256(),
)
def verify_signature(public_key, alleged_signature, data):
def verify_signature(public_key: PublicKey, alleged_signature: bytes, data: bytes) -> None:
"""
:param public_key: a verifying key, returned from `create_verifying_key_from_string` or `create_verifying_key_from_private_key`
@ -173,23 +203,23 @@ def verify_signature(public_key, alleged_signature, data):
raise BadSignature()
def _validate_public_key(public_key):
def _validate_public_key(public_key: PublicKey) -> None:
"""
Internal helper. Checks that `public_key` is a valid cryptography
object
"""
if not isinstance(public_key, rsa.RSAPublicKey):
raise ValueError(
"public_key must be an RSAPublicKey"
f"public_key must be an RSAPublicKey not {type(public_key)}"
)
def _validate_private_key(private_key):
def _validate_private_key(private_key: PrivateKey) -> None:
"""
Internal helper. Checks that `public_key` is a valid cryptography
object
"""
if not isinstance(private_key, rsa.RSAPrivateKey):
raise ValueError(
"private_key must be an RSAPrivateKey"
f"private_key must be an RSAPrivateKey not {type(private_key)}"
)

View File

@ -0,0 +1,495 @@
"""
Functions and classes relating to the Grid Manager internal state
"""
import sys
from datetime import (
datetime,
timezone,
)
from typing import (
Optional,
Union,
List,
)
from twisted.python.filepath import FilePath
from allmydata.crypto import (
ed25519,
)
from allmydata.util import (
base32,
jsonbytes as json,
dictutil,
)
from attrs import (
frozen,
Factory,
)
@frozen
class SignedCertificate(object):
"""
A signed certificate.
"""
# A JSON-encoded, UTF-8-encoded certificate.
certificate : bytes
# The signature (although the signature is in base32 in "public",
# this contains the decoded raw bytes -- not base32)
signature : bytes
@classmethod
def load(cls, file_like):
data = json.load(file_like)
return cls(
certificate=data["certificate"].encode("utf-8"),
signature=base32.a2b(data["signature"].encode("ascii")),
)
def marshal(self):
"""
:return dict: a json-able dict
"""
return dict(
certificate=self.certificate,
signature=base32.b2a(self.signature),
)
@frozen
class _GridManagerStorageServer(object):
"""
A Grid Manager's notion of a storage server
"""
name : str
public_key : ed25519.Ed25519PublicKey
certificates : list = Factory(list) # SignedCertificates
def add_certificate(self, certificate):
"""
Add ``certificate``
"""
self.certificates.append(certificate)
def public_key_string(self) -> bytes:
"""
:returns: the public key as bytes.
"""
return ed25519.string_from_verifying_key(self.public_key)
def marshal(self):
"""
:returns: a dict suitable for JSON representing this object
"""
return {
u"public_key": self.public_key_string(),
}
@frozen
class _GridManagerCertificate(object):
"""
Represents a single certificate for a single storage-server
"""
filename : str
index : int
expires : datetime
public_key : ed25519.Ed25519PublicKey
def create_grid_manager():
"""
Create a new Grid Manager with a fresh keypair
"""
private_key, public_key = ed25519.create_signing_keypair()
return _GridManager(
ed25519.string_from_signing_key(private_key),
{},
)
def current_datetime_with_zone():
"""
:returns: a timezone-aware datetime object representing the
current timestamp in UTC
"""
return datetime.now(timezone.utc)
def _load_certificates_for(config_path: FilePath, name: str, gm_key=Optional[ed25519.Ed25519PublicKey]) -> List[_GridManagerCertificate]:
"""
Load any existing certificates for the given storage-server.
:param FilePath config_path: the configuration location (or None for
stdin)
:param str name: the name of an existing storage-server
:param ed25519.Ed25519PublicKey gm_key: an optional Grid Manager
public key. If provided, certificates will be verified against it.
:returns: list containing any known certificates (may be empty)
:raises: ed25519.BadSignature if any certificate signature fails to verify
"""
cert_index = 0
cert_path = config_path.child('{}.cert.{}'.format(name, cert_index))
certificates = []
while cert_path.exists():
container = SignedCertificate.load(cert_path.open('r'))
if gm_key is not None:
validate_grid_manager_certificate(gm_key, container)
cert_data = json.loads(container.certificate)
if cert_data['version'] != 1:
raise ValueError(
"Unknown certificate version '{}' in '{}'".format(
cert_data['version'],
cert_path.path,
)
)
certificates.append(
_GridManagerCertificate(
filename=cert_path.path,
index=cert_index,
expires=datetime.fromisoformat(cert_data['expires']),
public_key=ed25519.verifying_key_from_string(cert_data['public_key'].encode('ascii')),
)
)
cert_index += 1
cert_path = config_path.child('{}.cert.{}'.format(name, cert_index))
return certificates
def load_grid_manager(config_path: Optional[FilePath]):
"""
Load a Grid Manager from existing configuration.
:param FilePath config_path: the configuration location (or None for
stdin)
:returns: a GridManager instance
:raises: ValueError if the confguration is invalid or IOError if
expected files can't be opened.
"""
if config_path is None:
config_file = sys.stdin
else:
# this might raise IOError or similar but caller must handle it
config_file = config_path.child("config.json").open("r")
with config_file:
config = json.load(config_file)
gm_version = config.get(u'grid_manager_config_version', None)
if gm_version != 0:
raise ValueError(
"Missing or unknown version '{}' of Grid Manager config".format(
gm_version
)
)
if 'private_key' not in config:
raise ValueError(
"'private_key' required in config"
)
private_key_bytes = config['private_key'].encode('ascii')
try:
private_key, public_key = ed25519.signing_keypair_from_string(private_key_bytes)
except Exception as e:
raise ValueError(
"Invalid Grid Manager private_key: {}".format(e)
)
storage_servers = dict()
for name, srv_config in list(config.get(u'storage_servers', {}).items()):
if 'public_key' not in srv_config:
raise ValueError(
"No 'public_key' for storage server '{}'".format(name)
)
storage_servers[name] = _GridManagerStorageServer(
name,
ed25519.verifying_key_from_string(srv_config['public_key'].encode('ascii')),
[] if config_path is None else _load_certificates_for(config_path, name, public_key),
)
return _GridManager(private_key_bytes, storage_servers)
class _GridManager(object):
"""
A Grid Manager's configuration.
"""
def __init__(self, private_key_bytes, storage_servers):
self._storage_servers = dictutil.UnicodeKeyDict(
{} if storage_servers is None else storage_servers
)
assert isinstance(private_key_bytes, bytes)
self._private_key_bytes = private_key_bytes
self._private_key, self._public_key = ed25519.signing_keypair_from_string(self._private_key_bytes)
self._version = 0
@property
def storage_servers(self):
return self._storage_servers
def public_identity(self):
"""
:returns: public key as a string
"""
return ed25519.string_from_verifying_key(self._public_key)
def sign(self, name, expiry):
"""
Create a new signed certificate for a particular server
:param str name: the server to create a certificate for
:param timedelta expiry: how far in the future the certificate
should expire.
:returns SignedCertificate: the signed certificate.
"""
assert isinstance(name, str) # must be unicode
try:
srv = self._storage_servers[name]
except KeyError:
raise KeyError(
"No storage server named '{}'".format(name)
)
expiration = current_datetime_with_zone() + expiry
cert_info = {
"expires": expiration.isoformat(),
"public_key": srv.public_key_string(),
"version": 1,
}
cert_data = json.dumps_bytes(cert_info, separators=(',',':'), sort_keys=True)
sig = ed25519.sign_data(self._private_key, cert_data)
certificate = SignedCertificate(
certificate=cert_data,
signature=sig,
)
vk = ed25519.verifying_key_from_signing_key(self._private_key)
ed25519.verify_signature(vk, sig, cert_data)
srv.add_certificate(certificate)
return certificate
def add_storage_server(self, name, public_key):
"""
:param name: a user-meaningful name for the server
:param public_key: ed25519.VerifyingKey the public-key of the
storage provider (e.g. from the contents of node.pubkey
for the client)
"""
assert isinstance(name, str) # must be unicode
if name in self._storage_servers:
raise KeyError(
"Already have a storage server called '{}'".format(name)
)
ss = _GridManagerStorageServer(name, public_key, [])
self._storage_servers[name] = ss
return ss
def remove_storage_server(self, name):
"""
:param name: a user-meaningful name for the server
"""
assert isinstance(name, str) # must be unicode
try:
del self._storage_servers[name]
except KeyError:
raise KeyError(
"No storage server called '{}'".format(name)
)
def marshal(self):
"""
:returns: a dict suitable for JSON representing this object
"""
data = {
u"grid_manager_config_version": self._version,
u"private_key": self._private_key_bytes.decode('ascii'),
}
if self._storage_servers:
data[u"storage_servers"] = {
name: srv.marshal()
for name, srv
in self._storage_servers.items()
}
return data
def save_grid_manager(file_path, grid_manager, create=True):
"""
Writes a Grid Manager configuration.
:param file_path: a FilePath specifying where to write the config
(if None, stdout is used)
:param grid_manager: a _GridManager instance
:param bool create: if True (the default) we are creating a new
grid-manager and will fail if the directory already exists.
"""
data = json.dumps(
grid_manager.marshal(),
indent=4,
)
if file_path is None:
print("{}\n".format(data))
else:
try:
file_path.makedirs()
file_path.chmod(0o700)
except OSError:
if create:
raise
with file_path.child("config.json").open("w") as f:
f.write(data.encode("utf-8"))
f.write(b"\n")
def parse_grid_manager_certificate(gm_data: Union[str, bytes]):
"""
:param gm_data: some data that might be JSON that might be a valid
Grid Manager Certificate
:returns: json data of a valid Grid Manager certificate, or an
exception if the data is not valid.
"""
required_keys = {
'certificate',
'signature',
}
js = json.loads(gm_data)
if not isinstance(js, dict):
raise ValueError(
"Grid Manager certificate must be a dict"
)
if set(js.keys()) != required_keys:
raise ValueError(
"Grid Manager certificate must contain: {}".format(
", ".join("'{}'".format(k) for k in required_keys),
)
)
return js
def validate_grid_manager_certificate(gm_key, alleged_cert):
"""
:param gm_key: a VerifyingKey instance, a Grid Manager's public
key.
:param alleged_cert SignedCertificate: A signed certificate.
:return: a dict consisting of the deserialized certificate data or
None if the signature is invalid. Note we do NOT check the
expiry time in this function.
"""
try:
ed25519.verify_signature(
gm_key,
alleged_cert.signature,
alleged_cert.certificate,
)
except ed25519.BadSignature:
return None
# signature is valid; now we can load the actual data
cert = json.loads(alleged_cert.certificate)
return cert
def create_grid_manager_verifier(keys, certs, public_key, now_fn=None, bad_cert=None):
"""
Creates a predicate for confirming some Grid Manager-issued
certificates against Grid Manager keys. A predicate is used
(instead of just returning True/False here) so that the
expiry-time can be tested on each call.
:param list keys: 0 or more ``VerifyingKey`` instances
:param list certs: 1 or more Grid Manager certificates each of
which is a ``SignedCertificate``.
:param str public_key: the identifier of the server we expect
certificates for.
:param callable now_fn: a callable which returns the current UTC
timestamp (or current_datetime_with_zone() if None).
:param callable bad_cert: a two-argument callable which is invoked
when a certificate verification fails. The first argument is
the verifying key and the second is the certificate. If None
(the default) errors are print()-ed. Note that we may have
several certificates and only one must be valid, so this may
be called (multiple times) even if the function ultimately
returns successfully.
:returns: a callable which will return True only-if there is at
least one valid certificate (that has not at this moment
expired) in `certs` signed by one of the keys in `keys`.
"""
now_fn = current_datetime_with_zone if now_fn is None else now_fn
valid_certs = []
# if we have zero grid-manager keys then everything is valid
if not keys:
return lambda: True
if bad_cert is None:
def bad_cert(key, alleged_cert):
"""
We might want to let the user know about this failed-to-verify
certificate .. but also if you have multiple grid-managers
then a bunch of these messages would appear. Better would
be to bubble this up to some sort of status API (or maybe
on the Welcome page?)
The only thing that might actually be interesting, though,
is whether this whole function returns false or not..
"""
print(
"Grid Manager certificate signature failed. Certificate: "
"\"{cert}\" for key \"{key}\".".format(
cert=alleged_cert,
key=ed25519.string_from_verifying_key(key),
)
)
# validate the signatures on any certificates we have (not yet the expiry dates)
for alleged_cert in certs:
for key in keys:
cert = validate_grid_manager_certificate(key, alleged_cert)
if cert is not None:
valid_certs.append(cert)
else:
bad_cert(key, alleged_cert)
def validate():
"""
:returns: True if *any* certificate is still valid for a server
"""
now = now_fn()
for cert in valid_certs:
expires = datetime.fromisoformat(cert["expires"])
if cert['public_key'].encode("ascii") == public_key:
if expires > now:
# not-expired
return True
return False
return validate

View File

@ -19,7 +19,7 @@ from foolscap.api import eventually
from allmydata import uri
from allmydata.codec import CRSDecoder
from allmydata.util import base32, log, hashutil, mathutil, observer
from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE
from allmydata.interfaces import DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
from allmydata.hashtree import IncompleteHashTree, BadHashError, \
NotEnoughHashesError
@ -49,6 +49,8 @@ class DownloadNode(object):
"""Internal class which manages downloads and holds state. External
callers use CiphertextFileNode instead."""
default_max_segment_size = DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
# Share._node points to me
def __init__(self, verifycap, storage_broker, secret_holder,
terminator, history, download_status):
@ -76,7 +78,7 @@ class DownloadNode(object):
# .guessed_segment_size, .guessed_num_segments, and
# .ciphertext_hash_tree (with a dummy, to let us guess which hashes
# we'll need)
self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)
self._build_guessed_tables(self.default_max_segment_size)
# filled in when we parse a valid UEB
self.have_UEB = False

View File

@ -262,6 +262,8 @@ class Encoder(object):
d.addCallback(lambda res: self.finish_hashing())
# These calls have to happen in order; layout.py now requires writes to
# be appended to the data written so far.
d.addCallback(lambda res:
self.send_crypttext_hash_tree_to_all_shareholders())
d.addCallback(lambda res: self.send_all_block_hash_trees())

View File

@ -1,21 +1,18 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
import struct
from io import BytesIO
from attrs import define, field
from zope.interface import implementer
from twisted.internet import defer
from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \
FileTooLargeError, HASH_SIZE
from allmydata.util import mathutil, observer, pipeline, log
from allmydata.util import mathutil, observer, log
from allmydata.util.assertutil import precondition
from allmydata.storage.server import si_b2a
@ -107,19 +104,58 @@ def make_write_bucket_proxy(rref, server,
num_share_hashes, uri_extension_size)
return wbp
@define
class _WriteBuffer:
"""
Queue up small writes to be written in a single batched larger write.
"""
_batch_size: int
_to_write : BytesIO = field(factory=BytesIO)
_written_bytes : int = field(default=0)
def queue_write(self, data: bytes) -> bool:
"""
Queue a write. If the result is ``False``, no further action is needed
for now. If the result is some ``True``, it's time to call ``flush()``
and do a real write.
"""
self._to_write.write(data)
return self.get_queued_bytes() >= self._batch_size
def flush(self) -> tuple[int, bytes]:
"""Return offset and data to be written."""
offset = self._written_bytes
data = self._to_write.getvalue()
self._written_bytes += len(data)
self._to_write = BytesIO()
return (offset, data)
def get_queued_bytes(self) -> int:
"""Return number of queued, unwritten bytes."""
return self._to_write.tell()
def get_total_bytes(self) -> int:
"""Return how many bytes were written or queued in total."""
return self._written_bytes + self.get_queued_bytes()
@implementer(IStorageBucketWriter)
class WriteBucketProxy(object):
"""
Note: The various ``put_`` methods need to be called in the order in which the
bytes will get written.
"""
fieldsize = 4
fieldstruct = ">L"
def __init__(self, rref, server, data_size, block_size, num_segments,
num_share_hashes, uri_extension_size, pipeline_size=50000):
num_share_hashes, uri_extension_size, batch_size=1_000_000):
self._rref = rref
self._server = server
self._data_size = data_size
self._block_size = block_size
self._num_segments = num_segments
self._written_bytes = 0
effective_segments = mathutil.next_power_of_k(num_segments,2)
self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE
@ -130,11 +166,13 @@ class WriteBucketProxy(object):
self._create_offsets(block_size, data_size)
# k=3, max_segment_size=128KiB gives us a typical segment of 43691
# bytes. Setting the default pipeline_size to 50KB lets us get two
# segments onto the wire but not a third, which would keep the pipe
# filled.
self._pipeline = pipeline.Pipeline(pipeline_size)
# With a ~1MB batch size, max upload speed is 1MB/(round-trip latency)
# assuming the writing code waits for writes to finish, so 20MB/sec if
# latency is 50ms. In the US many people only have 1MB/sec upload speed
# as of 2022 (standard Comcast). For further discussion of how one
# might set batch sizes see
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3787#comment:1.
self._write_buffer = _WriteBuffer(batch_size)
def get_allocated_size(self):
return (self._offsets['uri_extension'] + self.fieldsize +
@ -179,7 +217,7 @@ class WriteBucketProxy(object):
return "<WriteBucketProxy for node %r>" % self._server.get_name()
def put_header(self):
return self._write(0, self._offset_data)
return self._queue_write(0, self._offset_data)
def put_block(self, segmentnum, data):
offset = self._offsets['data'] + segmentnum * self._block_size
@ -193,13 +231,13 @@ class WriteBucketProxy(object):
(self._block_size *
(self._num_segments - 1))),
len(data), self._block_size)
return self._write(offset, data)
return self._queue_write(offset, data)
def put_crypttext_hashes(self, hashes):
# plaintext_hash_tree precedes crypttext_hash_tree. It is not used, and
# so is not explicitly written, but we need to write everything, so
# fill it in with nulls.
d = self._write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size)
d = self._queue_write(self._offsets['plaintext_hash_tree'], b"\x00" * self._segment_hash_size)
d.addCallback(lambda _: self._really_put_crypttext_hashes(hashes))
return d
@ -212,7 +250,7 @@ class WriteBucketProxy(object):
precondition(offset + len(data) <= self._offsets['block_hashes'],
offset, len(data), offset+len(data),
self._offsets['block_hashes'])
return self._write(offset, data)
return self._queue_write(offset, data)
def put_block_hashes(self, blockhashes):
offset = self._offsets['block_hashes']
@ -223,7 +261,7 @@ class WriteBucketProxy(object):
precondition(offset + len(data) <= self._offsets['share_hashes'],
offset, len(data), offset+len(data),
self._offsets['share_hashes'])
return self._write(offset, data)
return self._queue_write(offset, data)
def put_share_hashes(self, sharehashes):
# sharehashes is a list of (index, hash) tuples, so they get stored
@ -237,29 +275,45 @@ class WriteBucketProxy(object):
precondition(offset + len(data) <= self._offsets['uri_extension'],
offset, len(data), offset+len(data),
self._offsets['uri_extension'])
return self._write(offset, data)
return self._queue_write(offset, data)
def put_uri_extension(self, data):
offset = self._offsets['uri_extension']
assert isinstance(data, bytes)
precondition(len(data) == self._uri_extension_size)
length = struct.pack(self.fieldstruct, len(data))
return self._write(offset, length+data)
return self._queue_write(offset, length+data)
def _write(self, offset, data):
# use a Pipeline to pipeline several writes together. TODO: another
# speedup would be to coalesce small writes into a single call: this
# would reduce the foolscap CPU overhead per share, but wouldn't
# reduce the number of round trips, so it might not be worth the
# effort.
self._written_bytes += len(data)
return self._pipeline.add(len(data),
self._rref.callRemote, "write", offset, data)
def _queue_write(self, offset, data):
"""
This queues up small writes to be written in a single batched larger
write.
Callers of this function are expected to queue the data in order, with
no holes. As such, the offset is technically unnecessary, but is used
to check the inputs. Possibly we should get rid of it.
"""
assert offset == self._write_buffer.get_total_bytes()
if self._write_buffer.queue_write(data):
return self._actually_write()
else:
return defer.succeed(False)
def _actually_write(self):
"""Write data to the server."""
offset, data = self._write_buffer.flush()
return self._rref.callRemote("write", offset, data)
def close(self):
assert self._written_bytes == self.get_allocated_size(), f"{self._written_bytes} != {self.get_allocated_size()}"
d = self._pipeline.add(0, self._rref.callRemote, "close")
d.addCallback(lambda ign: self._pipeline.flush())
assert self._write_buffer.get_total_bytes() == self.get_allocated_size(), (
f"{self._written_buffer.get_total_bytes_queued()} != {self.get_allocated_size()}"
)
if self._write_buffer.get_queued_bytes() > 0:
d = self._actually_write()
else:
# No data queued, don't send empty string write.
d = defer.succeed(True)
d.addCallback(lambda _: self._rref.callRemote("close"))
return d
def abort(self):
@ -371,16 +425,16 @@ class ReadBucketProxy(object):
self._fieldsize = fieldsize
self._fieldstruct = fieldstruct
for field in ( 'data',
'plaintext_hash_tree', # UNUSED
'crypttext_hash_tree',
'block_hashes',
'share_hashes',
'uri_extension',
):
for field_name in ( 'data',
'plaintext_hash_tree', # UNUSED
'crypttext_hash_tree',
'block_hashes',
'share_hashes',
'uri_extension',
):
offset = struct.unpack(fieldstruct, data[x:x+fieldsize])[0]
x += fieldsize
self._offsets[field] = offset
self._offsets[field_name] = offset
return self._offsets
def _get_block_data(self, unused, blocknum, blocksize, thisblocksize):

View File

@ -48,7 +48,7 @@ from allmydata.util.rrefutil import add_version_to_remote_reference
from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus, \
NoServersError, InsufficientVersionError, UploadUnhappinessError, \
DEFAULT_MAX_SEGMENT_SIZE, IPeerSelector
DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE, IPeerSelector
from allmydata.immutable import layout
from io import BytesIO
@ -543,7 +543,7 @@ class Tahoe2ServerSelector(log.PrefixingLogMixin):
# 0. Start with an ordered list of servers. Maybe *2N* of them.
#
all_servers = storage_broker.get_servers_for_psi(storage_index)
all_servers = storage_broker.get_servers_for_psi(storage_index, for_upload=True)
if not all_servers:
raise NoServersError("client gave us zero servers")
@ -1692,7 +1692,7 @@ class AssistedUploader(object):
class BaseUploadable(object):
# this is overridden by max_segment_size
default_max_segment_size = DEFAULT_MAX_SEGMENT_SIZE
default_max_segment_size = DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
default_params_set = False
max_segment_size = None

View File

@ -41,7 +41,8 @@ URI = StringConstraint(300) # kind of arbitrary
MAX_BUCKETS = 256 # per peer -- zfec offers at most 256 shares per file
DEFAULT_MAX_SEGMENT_SIZE = 128*1024
# The default size for segments of new CHK ("immutable") uploads.
DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE = 1024*1024
ShareData = StringConstraint(None)
URIExtensionData = StringConstraint(1000)
@ -560,6 +561,12 @@ class IServer(IDisplayableServer):
once the connection is lost.
"""
def upload_permitted():
"""
:return: True if we should use this server for uploads, False
otherwise.
"""
def get_storage_server():
"""
Once a server is connected, I return an ``IStorageServer``.
@ -570,8 +577,6 @@ class IServer(IDisplayableServer):
"""
class IMutableSlotWriter(Interface):
"""
The interface for a writer around a mutable slot on a remote server.

View File

@ -1,14 +1,7 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
MODE_CHECK = "MODE_CHECK" # query all peers
MODE_ANYTHING = "MODE_ANYTHING" # one recoverable version
@ -17,6 +10,9 @@ MODE_WRITE = "MODE_WRITE" # replace all shares, probably.. not for initial
MODE_READ = "MODE_READ"
MODE_REPAIR = "MODE_REPAIR" # query all peers, get the privkey
from allmydata.crypto import aes, rsa
from allmydata.util import hashutil
class NotWriteableError(Exception):
pass
@ -68,3 +64,33 @@ class CorruptShareError(BadShareError):
class UnknownVersionError(BadShareError):
"""The share we received was of a version we don't recognize."""
def encrypt_privkey(writekey: bytes, privkey: bytes) -> bytes:
"""
For SSK, encrypt a private ("signature") key using the writekey.
"""
encryptor = aes.create_encryptor(writekey)
crypttext = aes.encrypt_data(encryptor, privkey)
return crypttext
def decrypt_privkey(writekey: bytes, enc_privkey: bytes) -> rsa.PrivateKey:
"""
The inverse of ``encrypt_privkey``.
"""
decryptor = aes.create_decryptor(writekey)
privkey = aes.decrypt_data(decryptor, enc_privkey)
return privkey
def derive_mutable_keys(keypair: tuple[rsa.PublicKey, rsa.PrivateKey]) -> tuple[bytes, bytes, bytes]:
"""
Derive the SSK writekey, encrypted writekey, and fingerprint from the
public/private ("verification" / "signature") keypair.
"""
pubkey, privkey = keypair
pubkey_s = rsa.der_string_from_verifying_key(pubkey)
privkey_s = rsa.der_string_from_signing_key(privkey)
writekey = hashutil.ssk_writekey_hash(privkey_s)
encprivkey = encrypt_privkey(writekey, privkey_s)
fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey_s)
return writekey, encprivkey, fingerprint

View File

@ -1,14 +1,7 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
import random
@ -16,8 +9,6 @@ from zope.interface import implementer
from twisted.internet import defer, reactor
from foolscap.api import eventually
from allmydata.crypto import aes
from allmydata.crypto import rsa
from allmydata.interfaces import IMutableFileNode, ICheckable, ICheckResults, \
NotEnoughSharesError, MDMF_VERSION, SDMF_VERSION, IMutableUploadable, \
IMutableFileVersion, IWriteable
@ -28,8 +19,14 @@ from allmydata.uri import WriteableSSKFileURI, ReadonlySSKFileURI, \
from allmydata.monitor import Monitor
from allmydata.mutable.publish import Publish, MutableData,\
TransformingUploadable
from allmydata.mutable.common import MODE_READ, MODE_WRITE, MODE_CHECK, UnrecoverableFileError, \
UncoordinatedWriteError
from allmydata.mutable.common import (
MODE_READ,
MODE_WRITE,
MODE_CHECK,
UnrecoverableFileError,
UncoordinatedWriteError,
derive_mutable_keys,
)
from allmydata.mutable.servermap import ServerMap, ServermapUpdater
from allmydata.mutable.retrieve import Retrieve
from allmydata.mutable.checker import MutableChecker, MutableCheckAndRepairer
@ -139,13 +136,10 @@ class MutableFileNode(object):
Deferred that fires (with the MutableFileNode instance you should
use) when it completes.
"""
(pubkey, privkey) = keypair
self._pubkey, self._privkey = pubkey, privkey
pubkey_s = rsa.der_string_from_verifying_key(self._pubkey)
privkey_s = rsa.der_string_from_signing_key(self._privkey)
self._writekey = hashutil.ssk_writekey_hash(privkey_s)
self._encprivkey = self._encrypt_privkey(self._writekey, privkey_s)
self._fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey_s)
self._pubkey, self._privkey = keypair
self._writekey, self._encprivkey, self._fingerprint = derive_mutable_keys(
keypair,
)
if version == MDMF_VERSION:
self._uri = WriteableMDMFFileURI(self._writekey, self._fingerprint)
self._protocol_version = version
@ -171,16 +165,6 @@ class MutableFileNode(object):
(contents, type(contents))
return contents(self)
def _encrypt_privkey(self, writekey, privkey):
encryptor = aes.create_encryptor(writekey)
crypttext = aes.encrypt_data(encryptor, privkey)
return crypttext
def _decrypt_privkey(self, enc_privkey):
decryptor = aes.create_decryptor(self._writekey)
privkey = aes.decrypt_data(decryptor, enc_privkey)
return privkey
def _populate_pubkey(self, pubkey):
self._pubkey = pubkey
def _populate_required_shares(self, required_shares):

View File

@ -35,8 +35,13 @@ from allmydata.mutable.layout import get_version_from_checkstring,\
MDMFSlotWriteProxy, \
SDMFSlotWriteProxy
from eliot import (
Message,
start_action,
)
KiB = 1024
DEFAULT_MAX_SEGMENT_SIZE = 128 * KiB
DEFAULT_MUTABLE_MAX_SEGMENT_SIZE = 128 * KiB
PUSHING_BLOCKS_STATE = 0
PUSHING_EVERYTHING_ELSE_STATE = 1
DONE_STATE = 2
@ -367,7 +372,7 @@ class Publish(object):
self.data = newdata
self.datalength = newdata.get_size()
#if self.datalength >= DEFAULT_MAX_SEGMENT_SIZE:
#if self.datalength >= DEFAULT_MUTABLE_MAX_SEGMENT_SIZE:
# self._version = MDMF_VERSION
#else:
# self._version = SDMF_VERSION
@ -551,7 +556,7 @@ class Publish(object):
def setup_encoding_parameters(self, offset=0):
if self._version == MDMF_VERSION:
segment_size = DEFAULT_MAX_SEGMENT_SIZE # 128 KiB by default
segment_size = DEFAULT_MUTABLE_MAX_SEGMENT_SIZE # 128 KiB by default
else:
segment_size = self.datalength # SDMF is only one segment
# this must be a multiple of self.required_shares
@ -955,12 +960,31 @@ class Publish(object):
old_assignments.add(server, shnum)
serverlist = []
for i, server in enumerate(self.full_serverlist):
serverid = server.get_serverid()
if server in self.bad_servers:
continue
entry = (len(old_assignments.get(server, [])), i, serverid, server)
serverlist.append(entry)
action = start_action(
action_type=u"mutable:upload:update_goal",
homeless_shares=len(homeless_shares),
)
with action:
for i, server in enumerate(self.full_serverlist):
serverid = server.get_serverid()
if server in self.bad_servers:
Message.log(
message_type=u"mutable:upload:bad-server",
server_id=serverid,
)
continue
# if we have >= 1 grid-managers, this checks that we have
# a valid certificate for this server
if not server.upload_permitted():
Message.log(
message_type=u"mutable:upload:no-gm-certs",
server_id=serverid,
)
continue
entry = (len(old_assignments.get(server, [])), i, serverid, server)
serverlist.append(entry)
serverlist.sort()
if not serverlist:

View File

@ -1,15 +1,7 @@
"""
Ported to Python 3.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
# Don't import bytes and str, to prevent API leakage
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, max, min # noqa: F401
from __future__ import annotations
import time
@ -32,7 +24,7 @@ from allmydata import hashtree, codec
from allmydata.storage.server import si_b2a
from allmydata.mutable.common import CorruptShareError, BadShareError, \
UncoordinatedWriteError
UncoordinatedWriteError, decrypt_privkey
from allmydata.mutable.layout import MDMFSlotReadProxy
@implementer(IRetrieveStatus)
@ -931,9 +923,10 @@ class Retrieve(object):
def _try_to_validate_privkey(self, enc_privkey, reader, server):
alleged_privkey_s = self._node._decrypt_privkey(enc_privkey)
node_writekey = self._node.get_writekey()
alleged_privkey_s = decrypt_privkey(node_writekey, enc_privkey)
alleged_writekey = hashutil.ssk_writekey_hash(alleged_privkey_s)
if alleged_writekey != self._node.get_writekey():
if alleged_writekey != node_writekey:
self.log("invalid privkey from %s shnum %d" %
(reader, reader.shnum),
level=log.WEIRD, umid="YIw4tA")

View File

@ -1,16 +1,8 @@
"""
Ported to Python 3.
"""
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from __future__ import annotations
from future.utils import PY2
if PY2:
# Doesn't import str to prevent API leakage on Python 2
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401
from past.builtins import unicode
from six import ensure_str
import sys, time, copy
@ -29,7 +21,7 @@ from allmydata.storage.server import si_b2a
from allmydata.interfaces import IServermapUpdaterStatus
from allmydata.mutable.common import MODE_CHECK, MODE_ANYTHING, MODE_WRITE, \
MODE_READ, MODE_REPAIR, CorruptShareError
MODE_READ, MODE_REPAIR, CorruptShareError, decrypt_privkey
from allmydata.mutable.layout import SIGNED_PREFIX_LENGTH, MDMFSlotReadProxy
@implementer(IServermapUpdaterStatus)
@ -203,8 +195,8 @@ class ServerMap(object):
(seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
offsets_tuple) = verinfo
print("[%s]: sh#%d seq%d-%s %d-of-%d len%d" %
(unicode(server.get_name(), "utf-8"), shnum,
seqnum, unicode(base32.b2a(root_hash)[:4], "utf-8"), k, N,
(str(server.get_name(), "utf-8"), shnum,
seqnum, str(base32.b2a(root_hash)[:4], "utf-8"), k, N,
datalength), file=out)
if self._problems:
print("%d PROBLEMS" % len(self._problems), file=out)
@ -276,7 +268,7 @@ class ServerMap(object):
"""Take a versionid, return a string that describes it."""
(seqnum, root_hash, IV, segsize, datalength, k, N, prefix,
offsets_tuple) = verinfo
return "seq%d-%s" % (seqnum, unicode(base32.b2a(root_hash)[:4], "utf-8"))
return "seq%d-%s" % (seqnum, str(base32.b2a(root_hash)[:4], "utf-8"))
def summarize_versions(self):
"""Return a string describing which versions we know about."""
@ -824,7 +816,7 @@ class ServermapUpdater(object):
def notify_server_corruption(self, server, shnum, reason):
if isinstance(reason, unicode):
if isinstance(reason, str):
reason = reason.encode("utf-8")
ss = server.get_storage_server()
ss.advise_corrupt_share(
@ -879,7 +871,7 @@ class ServermapUpdater(object):
# ok, it's a valid verinfo. Add it to the list of validated
# versions.
self.log(" found valid version %d-%s from %s-sh%d: %d-%d/%d/%d"
% (seqnum, unicode(base32.b2a(root_hash)[:4], "utf-8"),
% (seqnum, str(base32.b2a(root_hash)[:4], "utf-8"),
ensure_str(server.get_name()), shnum,
k, n, segsize, datalen),
parent=lp)
@ -951,9 +943,10 @@ class ServermapUpdater(object):
writekey stored in my node. If it is valid, then I set the
privkey and encprivkey properties of the node.
"""
alleged_privkey_s = self._node._decrypt_privkey(enc_privkey)
node_writekey = self._node.get_writekey()
alleged_privkey_s = decrypt_privkey(node_writekey, enc_privkey)
alleged_writekey = hashutil.ssk_writekey_hash(alleged_privkey_s)
if alleged_writekey != self._node.get_writekey():
if alleged_writekey != node_writekey:
self.log("invalid privkey from %r shnum %d" %
(server.get_name(), shnum),
parent=lp, level=log.WEIRD, umid="aJVccw")

View File

@ -14,6 +14,7 @@ if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from six import ensure_str, ensure_text
import json
import datetime
import os.path
import re
@ -350,6 +351,19 @@ class _Config(object):
"Unable to write config file '{}'".format(fn),
)
def enumerate_section(self, section):
"""
returns a dict containing all items in a configuration section. an
empty dict is returned if the section doesn't exist.
"""
answer = dict()
try:
for k in self.config.options(section):
answer[k] = self.config.get(section, k)
except configparser.NoSectionError:
pass
return answer
def items(self, section, default=_None):
try:
return self.config.items(section)
@ -484,6 +498,12 @@ class _Config(object):
"""
returns an absolute path inside the 'private' directory with any
extra args join()-ed
This exists for historical reasons. New code should ideally
not call this because it makes it harder for e.g. a SQL-based
_Config object to exist. Code that needs to call this method
should probably be a _Config method itself. See
e.g. get_grid_manager_certificates()
"""
return os.path.join(self._basedir, "private", *args)
@ -491,6 +511,12 @@ class _Config(object):
"""
returns an absolute path inside the config directory with any
extra args join()-ed
This exists for historical reasons. New code should ideally
not call this because it makes it harder for e.g. a SQL-based
_Config object to exist. Code that needs to call this method
should probably be a _Config method itself. See
e.g. get_grid_manager_certificates()
"""
# note: we re-expand here (_basedir already went through this
# expanduser function) in case the path we're being asked for
@ -499,6 +525,35 @@ class _Config(object):
os.path.join(self._basedir, *args)
)
def get_grid_manager_certificates(self):
"""
Load all Grid Manager certificates in the config.
:returns: A list of all certificates. An empty list is
returned if there are none.
"""
grid_manager_certificates = []
cert_fnames = list(self.enumerate_section("grid_manager_certificates").values())
for fname in cert_fnames:
fname = self.get_config_path(fname)
if not os.path.exists(fname):
raise ValueError(
"Grid Manager certificate file '{}' doesn't exist".format(
fname
)
)
with open(fname, 'r') as f:
cert = json.load(f)
if set(cert.keys()) != {"certificate", "signature"}:
raise ValueError(
"Unknown key in Grid Manager certificate '{}'".format(
fname
)
)
grid_manager_certificates.append(cert)
return grid_manager_certificates
def get_introducer_configuration(self):
"""
Get configuration for introducers.

View File

@ -1,17 +1,12 @@
"""
Ported to Python 3.
Create file nodes of various types.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
import weakref
from zope.interface import implementer
from twisted.internet.defer import succeed
from allmydata.util.assertutil import precondition
from allmydata.interfaces import INodeMaker
from allmydata.immutable.literal import LiteralFileNode
@ -22,6 +17,7 @@ from allmydata.mutable.publish import MutableData
from allmydata.dirnode import DirectoryNode, pack_children
from allmydata.unknown import UnknownNode
from allmydata.blacklist import ProhibitedNode
from allmydata.crypto.rsa import PublicKey, PrivateKey
from allmydata import uri
@ -126,12 +122,15 @@ class NodeMaker(object):
return self._create_dirnode(filenode)
return None
def create_mutable_file(self, contents=None, version=None):
def create_mutable_file(self, contents=None, version=None, keypair: tuple[PublicKey, PrivateKey] | None = None):
if version is None:
version = self.mutable_file_default
n = MutableFileNode(self.storage_broker, self.secret_holder,
self.default_encoding_parameters, self.history)
d = self.key_generator.generate()
if keypair is None:
d = self.key_generator.generate()
else:
d = succeed(keypair)
d.addCallback(n.create_with_keys, contents, version=version)
d.addCallback(lambda res: n)
return d

View File

@ -89,7 +89,7 @@ class _FoolscapOrHttps(Protocol, metaclass=_PretendToBeNegotiation):
certificate=cls.tub.myCertificate.original,
)
http_storage_server = HTTPServer(storage_server, swissnum)
http_storage_server = HTTPServer(reactor, storage_server, swissnum)
cls.https_factory = TLSMemoryBIOFactory(
certificate_options,
False,

View File

@ -12,11 +12,6 @@ if PY2:
from six import ensure_binary
try:
from allmydata.scripts.types_ import SubCommands
except ImportError:
pass
from twisted.python import usage
from twisted.python.filepath import (
FilePath,
@ -29,6 +24,14 @@ from allmydata.storage import (
crawler,
expirer,
)
from allmydata.scripts.types_ import SubCommands
from allmydata.client import read_config
from allmydata.grid_manager import (
parse_grid_manager_certificate,
)
from allmydata.scripts.cli import _default_nodedir
from allmydata.util.encodingutil import argv_to_abspath
from allmydata.util import jsonbytes
class GenerateKeypairOptions(BaseOptions):
@ -75,6 +78,7 @@ def derive_pubkey(options):
print("public:", str(ed25519.string_from_verifying_key(public_key), "ascii"), file=out)
return 0
class MigrateCrawlerOptions(BasedirOptions):
def getSynopsis(self):
@ -94,6 +98,61 @@ class MigrateCrawlerOptions(BasedirOptions):
return t
class AddGridManagerCertOptions(BaseOptions):
"""
Options for add-grid-manager-cert
"""
optParameters = [
['filename', 'f', None, "Filename of the certificate ('-', a dash, for stdin)"],
['name', 'n', None, "Name to give this certificate"],
]
def getSynopsis(self):
return "Usage: tahoe [global-options] admin add-grid-manager-cert [options]"
def postOptions(self) -> None:
if self['name'] is None:
raise usage.UsageError(
"Must provide --name option"
)
if self['filename'] is None:
raise usage.UsageError(
"Must provide --filename option"
)
data: str
if self['filename'] == '-':
print("reading certificate from stdin", file=self.parent.parent.stderr)
data = self.parent.parent.stdin.read()
if len(data) == 0:
raise usage.UsageError(
"Reading certificate from stdin failed"
)
else:
with open(self['filename'], 'r') as f:
data = f.read()
try:
self.certificate_data = parse_grid_manager_certificate(data)
except ValueError as e:
raise usage.UsageError(
"Error parsing certificate: {}".format(e)
)
def getUsage(self, width=None):
t = BaseOptions.getUsage(self, width)
t += (
"Adds a Grid Manager certificate to a Storage Server.\n\n"
"The certificate will be copied into the base-dir and config\n"
"will be added to 'tahoe.cfg', which will be re-written. A\n"
"restart is required for changes to take effect.\n\n"
"The human who operates a Grid Manager would produce such a\n"
"certificate and communicate it securely to you.\n"
)
return t
def migrate_crawler(options):
out = options.stdout
storage = FilePath(options['basedir']).child("storage")
@ -116,6 +175,44 @@ def migrate_crawler(options):
print("Not found: '{}'".format(fp.path), file=out)
def add_grid_manager_cert(options):
"""
Add a new Grid Manager certificate to our config
"""
# XXX is there really not already a function for this?
if options.parent.parent['node-directory']:
nd = argv_to_abspath(options.parent.parent['node-directory'])
else:
nd = _default_nodedir
config = read_config(nd, "portnum")
cert_fname = "{}.cert".format(options['name'])
cert_path = FilePath(config.get_config_path(cert_fname))
cert_bytes = jsonbytes.dumps_bytes(options.certificate_data, indent=4) + b'\n'
cert_name = options['name']
if cert_path.exists():
msg = "Already have certificate for '{}' (at {})".format(
options['name'],
cert_path.path,
)
print(msg, file=options.stderr)
return 1
config.set_config("storage", "grid_management", "True")
config.set_config("grid_manager_certificates", cert_name, cert_fname)
# write all the data out
with cert_path.open("wb") as f:
f.write(cert_bytes)
cert_count = len(config.enumerate_section("grid_manager_certificates"))
print("There are now {} certificates".format(cert_count),
file=options.stderr)
return 0
class AdminCommand(BaseOptions):
subCommands = [
("generate-keypair", None, GenerateKeypairOptions,
@ -124,6 +221,9 @@ class AdminCommand(BaseOptions):
"Derive a public key from a private key."),
("migrate-crawler", None, MigrateCrawlerOptions,
"Write the crawler-history data as JSON."),
("add-grid-manager-cert", None, AddGridManagerCertOptions,
"Add a Grid Manager-provided certificate to a storage "
"server's config."),
]
def postOptions(self):
if not hasattr(self, 'subOptions'):
@ -138,11 +238,14 @@ each subcommand.
"""
return t
subDispatch = {
"generate-keypair": print_keypair,
"derive-pubkey": derive_pubkey,
"migrate-crawler": migrate_crawler,
}
"add-grid-manager-cert": add_grid_manager_cert,
}
def do_admin(options):
so = options.subOptions
@ -158,4 +261,4 @@ subCommands = [
dispatch = {
"admin": do_admin,
}
}

View File

@ -180,10 +180,22 @@ class GetOptions(FileStoreOptions):
class PutOptions(FileStoreOptions):
optFlags = [
("mutable", "m", "Create a mutable file instead of an immutable one (like --format=SDMF)"),
]
]
optParameters = [
("format", None, None, "Create a file with the given format: SDMF and MDMF for mutable, CHK (default) for immutable. (case-insensitive)"),
]
("private-key-path", None, None,
"***Warning*** "
"It is possible to use this option to spoil the normal security properties of mutable objects. "
"It is also possible to corrupt or destroy data with this option. "
"Most users will not need this option and can ignore it. "
"For mutables only, "
"this gives a file containing a PEM-encoded 2048 bit RSA private key to use as the signature key for the mutable. "
"The private key must be handled at least as strictly as the resulting capability string. "
"A single private key must not be used for more than one mutable."
),
]
def parseArgs(self, arg1=None, arg2=None):
# see Examples below

View File

@ -165,6 +165,8 @@ def parse_or_exit(config, argv, stdout, stderr):
:return: ``config``, after using it to parse the argument list.
"""
try:
config.stdout = stdout
config.stderr = stderr
parse_options(argv[1:], config=config)
except usage.error as e:
# `parse_options` may have the side-effect of initializing a
@ -199,6 +201,7 @@ def dispatch(config,
so.stdout = stdout
so.stderr = stderr
so.stdin = stdin
config.stdin = stdin
if command in create_dispatch:
f = create_dispatch[command]

View File

@ -1,23 +1,32 @@
"""
Ported to Python 3.
Implement the ``tahoe put`` command.
"""
from __future__ import unicode_literals
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
from io import BytesIO
from urllib.parse import quote as url_quote
from base64 import urlsafe_b64encode
from cryptography.hazmat.primitives.serialization import load_pem_private_key
from twisted.python.filepath import FilePath
from allmydata.crypto.rsa import PrivateKey, der_string_from_signing_key
from allmydata.scripts.common_http import do_http, format_http_success, format_http_error
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError
from allmydata.util.encodingutil import quote_output
def load_private_key(path: str) -> str:
"""
Load a private key from a file and return it in a format appropriate
to include in the HTTP request.
"""
privkey = load_pem_private_key(FilePath(path).getContent(), password=None)
assert isinstance(privkey, PrivateKey)
derbytes = der_string_from_signing_key(privkey)
return urlsafe_b64encode(derbytes).decode("ascii")
def put(options):
"""
@param verbosity: 0, 1, or 2, meaning quiet, verbose, or very verbose
@ -29,6 +38,10 @@ def put(options):
from_file = options.from_file
to_file = options.to_file
mutable = options['mutable']
if options["private-key-path"] is None:
private_key = None
else:
private_key = load_private_key(options["private-key-path"])
format = options['format']
if options['quiet']:
verbosity = 0
@ -79,6 +92,12 @@ def put(options):
queryargs = []
if mutable:
queryargs.append("mutable=true")
if private_key is not None:
queryargs.append(f"private-key={private_key}")
else:
if private_key is not None:
raise Exception("Can only supply a private key for mutables.")
if format:
queryargs.append("format=%s" % format)
if queryargs:
@ -92,10 +111,7 @@ def put(options):
if verbosity > 0:
print("waiting for file data on stdin..", file=stderr)
# We're uploading arbitrary files, so this had better be bytes:
if PY2:
stdinb = stdin
else:
stdinb = stdin.buffer
stdinb = stdin.buffer
data = stdinb.read()
infileobj = BytesIO(data)

View File

@ -21,7 +21,11 @@ from twisted.scripts import twistd
from twisted.python import usage
from twisted.python.filepath import FilePath
from twisted.python.reflect import namedAny
from twisted.internet.defer import maybeDeferred
from twisted.python.failure import Failure
from twisted.internet.defer import maybeDeferred, Deferred
from twisted.internet.protocol import Protocol
from twisted.internet.stdio import StandardIO
from twisted.internet.error import ReactorNotRunning
from twisted.application.service import Service
from allmydata.scripts.default_nodedir import _default_nodedir
@ -155,6 +159,8 @@ class DaemonizeTheRealService(Service, HookMixin):
def startService(self):
from twisted.internet import reactor
def start():
node_to_instance = {
u"client": lambda: maybeDeferred(namedAny("allmydata.client.create_client"), self.basedir),
@ -194,12 +200,14 @@ class DaemonizeTheRealService(Service, HookMixin):
def created(srv):
srv.setServiceParent(self.parent)
# exiting on stdin-closed facilitates cleanup when run
# as a subprocess
on_stdin_close(reactor, reactor.stop)
d.addCallback(created)
d.addErrback(handle_config_error)
d.addBoth(self._call_hook, 'running')
return d
from twisted.internet import reactor
reactor.callWhenRunning(start)
@ -213,6 +221,46 @@ class DaemonizeTahoeNodePlugin(object):
return DaemonizeTheRealService(self.nodetype, self.basedir, so)
def on_stdin_close(reactor, fn):
"""
Arrange for the function `fn` to run when our stdin closes
"""
when_closed_d = Deferred()
class WhenClosed(Protocol):
"""
Notify a Deferred when our connection is lost .. as this is passed
to twisted's StandardIO class, it is used to detect our parent
going away.
"""
def connectionLost(self, reason):
when_closed_d.callback(None)
def on_close(arg):
try:
fn()
except ReactorNotRunning:
pass
except Exception:
# for our "exit" use-case failures will _mostly_ just be
# ReactorNotRunning (because we're already shutting down
# when our stdin closes) but no matter what "bad thing"
# happens we just want to ignore it .. although other
# errors might be interesting so we'll log those
print(Failure())
return arg
when_closed_d.addBoth(on_close)
# we don't need to do anything with this instance because it gets
# hooked into the reactor and thus remembered .. but we return it
# for Windows testing purposes.
return StandardIO(
proto=WhenClosed(),
reactor=reactor,
)
def run(reactor, config, runApp=twistd.runApp):
"""
Runs a Tahoe-LAFS node in the foreground.

View File

@ -2,8 +2,6 @@
Type definitions used by modules in this package.
"""
# Python 3 only
from typing import List, Tuple, Type, Sequence, Any
from twisted.python.usage import Options

View File

@ -20,7 +20,11 @@ from twisted.web.http_headers import Headers
from twisted.web import http
from twisted.web.iweb import IPolicyForHTTPS
from twisted.internet.defer import inlineCallbacks, returnValue, fail, Deferred, succeed
from twisted.internet.interfaces import IOpenSSLClientConnectionCreator, IReactorTime
from twisted.internet.interfaces import (
IOpenSSLClientConnectionCreator,
IReactorTime,
IDelayedCall,
)
from twisted.internet.ssl import CertificateOptions
from twisted.web.client import Agent, HTTPConnectionPool
from zope.interface import implementer
@ -124,16 +128,22 @@ class _LengthLimitedCollector:
"""
remaining_length: int
timeout_on_silence: IDelayedCall
f: BytesIO = field(factory=BytesIO)
def __call__(self, data: bytes):
self.timeout_on_silence.reset(60)
self.remaining_length -= len(data)
if self.remaining_length < 0:
raise ValueError("Response length was too long")
self.f.write(data)
def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred[BinaryIO]:
def limited_content(
response,
clock: IReactorTime,
max_length: int = 30 * 1024 * 1024,
) -> Deferred[BinaryIO]:
"""
Like ``treq.content()``, but limit data read from the response to a set
length. If the response is longer than the max allowed length, the result
@ -142,39 +152,29 @@ def limited_content(response, max_length: int = 30 * 1024 * 1024) -> Deferred[Bi
A potentially useful future improvement would be using a temporary file to
store the content; since filesystem buffering means that would use memory
for small responses and disk for large responses.
This will time out if no data is received for 60 seconds; so long as a
trickle of data continues to arrive, it will continue to run.
"""
collector = _LengthLimitedCollector(max_length)
d = succeed(None)
timeout = clock.callLater(60, d.cancel)
collector = _LengthLimitedCollector(max_length, timeout)
# Make really sure everything gets called in Deferred context, treq might
# call collector directly...
d = succeed(None)
d.addCallback(lambda _: treq.collect(response, collector))
def done(_):
timeout.cancel()
collector.f.seek(0)
return collector.f
d.addCallback(done)
return d
def failed(f):
if timeout.active():
timeout.cancel()
return f
def _decode_cbor(response, schema: Schema):
"""Given HTTP response, return decoded CBOR body."""
def got_content(f: BinaryIO):
data = f.read()
schema.validate_cbor(data)
return loads(data)
if response.code > 199 and response.code < 300:
content_type = get_content_type(response.headers)
if content_type == CBOR_MIME_TYPE:
return limited_content(response).addCallback(got_content)
else:
raise ClientException(-1, "Server didn't send CBOR")
else:
return treq.content(response).addCallback(
lambda data: fail(ClientException(response.code, response.phrase, data))
)
return d.addCallbacks(done, failed)
@define
@ -323,6 +323,7 @@ class StorageClient(object):
swissnum = nurl.path[0].encode("ascii")
certificate_hash = nurl.user.encode("ascii")
pool = HTTPConnectionPool(reactor)
pool.maxPersistentPerHost = 20
if cls.TEST_MODE_REGISTER_HTTP_POOL is not None:
cls.TEST_MODE_REGISTER_HTTP_POOL(pool)
@ -362,6 +363,7 @@ class StorageClient(object):
write_enabler_secret=None,
headers=None,
message_to_serialize=None,
timeout: float = 60,
**kwargs,
):
"""
@ -370,6 +372,8 @@ class StorageClient(object):
If ``message_to_serialize`` is set, it will be serialized (by default
with CBOR) and set as the request body.
Default timeout is 60 seconds.
"""
headers = self._get_headers(headers)
@ -401,7 +405,28 @@ class StorageClient(object):
kwargs["data"] = dumps(message_to_serialize)
headers.addRawHeader("Content-Type", CBOR_MIME_TYPE)
return self._treq.request(method, url, headers=headers, **kwargs)
return self._treq.request(
method, url, headers=headers, timeout=timeout, **kwargs
)
def decode_cbor(self, response, schema: Schema):
"""Given HTTP response, return decoded CBOR body."""
def got_content(f: BinaryIO):
data = f.read()
schema.validate_cbor(data)
return loads(data)
if response.code > 199 and response.code < 300:
content_type = get_content_type(response.headers)
if content_type == CBOR_MIME_TYPE:
return limited_content(response, self._clock).addCallback(got_content)
else:
raise ClientException(-1, "Server didn't send CBOR")
else:
return treq.content(response).addCallback(
lambda data: fail(ClientException(response.code, response.phrase, data))
)
@define(hash=True)
@ -419,7 +444,9 @@ class StorageClientGeneral(object):
"""
url = self._client.relative_url("/storage/v1/version")
response = yield self._client.request("GET", url)
decoded_response = yield _decode_cbor(response, _SCHEMAS["get_version"])
decoded_response = yield self._client.decode_cbor(
response, _SCHEMAS["get_version"]
)
returnValue(decoded_response)
@inlineCallbacks
@ -486,6 +513,9 @@ def read_share_chunk(
share_type, _encode_si(storage_index), share_number
)
)
# The default 60 second timeout is for getting the response, so it doesn't
# include the time it takes to download the body... so we will will deal
# with that later, via limited_content().
response = yield client.request(
"GET",
url,
@ -494,6 +524,7 @@ def read_share_chunk(
# but Range constructor does that the conversion for us.
{"range": [Range("bytes", [(offset, offset + length)]).to_header()]}
),
unbuffered=True, # Don't buffer the response in memory.
)
if response.code == http.NO_CONTENT:
@ -516,7 +547,7 @@ def read_share_chunk(
raise ValueError("Server sent more than we asked for?!")
# It might also send less than we asked for. That's (probably) OK, e.g.
# if we went past the end of the file.
body = yield limited_content(response, supposed_length)
body = yield limited_content(response, client._clock, supposed_length)
body.seek(0, SEEK_END)
actual_length = body.tell()
if actual_length != supposed_length:
@ -603,7 +634,9 @@ class StorageClientImmutables(object):
upload_secret=upload_secret,
message_to_serialize=message,
)
decoded_response = yield _decode_cbor(response, _SCHEMAS["allocate_buckets"])
decoded_response = yield self._client.decode_cbor(
response, _SCHEMAS["allocate_buckets"]
)
returnValue(
ImmutableCreateResult(
already_have=decoded_response["already-have"],
@ -679,7 +712,9 @@ class StorageClientImmutables(object):
raise ClientException(
response.code,
)
body = yield _decode_cbor(response, _SCHEMAS["immutable_write_share_chunk"])
body = yield self._client.decode_cbor(
response, _SCHEMAS["immutable_write_share_chunk"]
)
remaining = RangeMap()
for chunk in body["required"]:
remaining.set(True, chunk["begin"], chunk["end"])
@ -708,7 +743,7 @@ class StorageClientImmutables(object):
url,
)
if response.code == http.OK:
body = yield _decode_cbor(response, _SCHEMAS["list_shares"])
body = yield self._client.decode_cbor(response, _SCHEMAS["list_shares"])
returnValue(set(body))
else:
raise ClientException(response.code)
@ -825,7 +860,9 @@ class StorageClientMutables:
message_to_serialize=message,
)
if response.code == http.OK:
result = await _decode_cbor(response, _SCHEMAS["mutable_read_test_write"])
result = await self._client.decode_cbor(
response, _SCHEMAS["mutable_read_test_write"]
)
return ReadTestWriteResult(success=result["success"], reads=result["data"])
else:
raise ClientException(response.code, (await response.content()))
@ -854,7 +891,9 @@ class StorageClientMutables:
)
response = await self._client.request("GET", url)
if response.code == http.OK:
return await _decode_cbor(response, _SCHEMAS["mutable_list_shares"])
return await self._client.decode_cbor(
response, _SCHEMAS["mutable_list_shares"]
)
else:
raise ClientException(response.code)

View File

@ -9,6 +9,8 @@ from functools import wraps
from base64 import b64decode
import binascii
from tempfile import TemporaryFile
from os import SEEK_END, SEEK_SET
import mmap
from cryptography.x509 import Certificate as CryptoCertificate
from zope.interface import implementer
@ -22,6 +24,7 @@ from twisted.internet.interfaces import (
from twisted.internet.address import IPv4Address, IPv6Address
from twisted.internet.defer import Deferred
from twisted.internet.ssl import CertificateOptions, Certificate, PrivateCertificate
from twisted.internet.interfaces import IReactorFromThreads
from twisted.web.server import Site, Request
from twisted.protocols.tls import TLSMemoryBIOFactory
from twisted.python.filepath import FilePath
@ -39,7 +42,7 @@ from cryptography.x509 import load_pem_x509_certificate
# TODO Make sure to use pure Python versions?
from cbor2 import dump, loads
import cbor2
from pycddl import Schema, ValidationError as CDDLValidationError
from .server import StorageServer
from .http_common import (
@ -54,6 +57,8 @@ from .common import si_a2b
from .immutable import BucketWriter, ConflictingWriteError
from ..util.hashutil import timing_safe_compare
from ..util.base32 import rfc3548_alphabet
from ..util.deferredutil import async_to_deferred
from ..util.cputhreadpool import defer_to_thread
from allmydata.interfaces import BadWriteEnablerError
@ -100,7 +105,7 @@ def _authorization_decorator(required_secrets):
@wraps(f)
def route(self, request, *args, **kwargs):
if not timing_safe_compare(
request.requestHeaders.getRawHeaders("Authorization", [None])[0].encode(
request.requestHeaders.getRawHeaders("Authorization", [""])[0].encode(
"utf-8"
),
swissnum_auth_header(self._swissnum),
@ -278,7 +283,7 @@ _SCHEMAS = {
"test-write-vectors": {
0*256 share_number : {
"test": [0*30 {"offset": uint, "size": uint, "specimen": bstr}]
"write": [0*30 {"offset": uint, "data": bstr}]
"write": [* {"offset": uint, "data": bstr}]
"new-length": uint / null
}
}
@ -484,8 +489,12 @@ class HTTPServer(object):
return str(failure.value).encode("utf-8")
def __init__(
self, storage_server, swissnum
): # type: (StorageServer, bytes) -> None
self,
reactor: IReactorFromThreads,
storage_server: StorageServer,
swissnum: bytes,
):
self._reactor = reactor
self._storage_server = storage_server
self._swissnum = swissnum
# Maps storage index to StorageIndexUploads:
@ -515,7 +524,7 @@ class HTTPServer(object):
if accept.best == CBOR_MIME_TYPE:
request.setHeader("Content-Type", CBOR_MIME_TYPE)
f = TemporaryFile()
dump(data, f)
cbor2.dump(data, f)
def read_data(offset: int, length: int) -> bytes:
f.seek(offset)
@ -527,27 +536,57 @@ class HTTPServer(object):
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3861
raise _HTTPError(http.NOT_ACCEPTABLE)
def _read_encoded(self, request, schema: Schema) -> Any:
async def _read_encoded(
self, request, schema: Schema, max_size: int = 1024 * 1024
) -> Any:
"""
Read encoded request body data, decoding it with CBOR by default.
Somewhat arbitrarily, limit body size to 1MB; this may be too low, we
may want to customize per query type, but this is the starting point
for now.
Somewhat arbitrarily, limit body size to 1MiB by default.
"""
content_type = get_content_type(request.requestHeaders)
if content_type == CBOR_MIME_TYPE:
# Read 1 byte more than 1MB. We expect length to be 1MB or
# less; if it's more assume it's not a legitimate message.
message = request.content.read(1024 * 1024 + 1)
if len(message) > 1024 * 1024:
raise _HTTPError(http.REQUEST_ENTITY_TOO_LARGE)
schema.validate_cbor(message)
result = loads(message)
return result
else:
if content_type != CBOR_MIME_TYPE:
raise _HTTPError(http.UNSUPPORTED_MEDIA_TYPE)
# Make sure it's not too large:
request.content.seek(0, SEEK_END)
size = request.content.tell()
if size > max_size:
raise _HTTPError(http.REQUEST_ENTITY_TOO_LARGE)
request.content.seek(0, SEEK_SET)
# We don't want to load the whole message into memory, cause it might
# be quite large. The CDDL validator takes a read-only bytes-like
# thing. Luckily, for large request bodies twisted.web will buffer the
# data in a file, so we can use mmap() to get a memory view. The CDDL
# validator will not make a copy, so it won't increase memory usage
# beyond that.
try:
fd = request.content.fileno()
except (ValueError, OSError):
fd = -1
if fd >= 0:
# It's a file, so we can use mmap() to save memory.
message = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
else:
message = request.content.read()
# Pycddl will release the GIL when validating larger documents, so
# let's take advantage of multiple CPUs:
if size > 10_000:
await defer_to_thread(self._reactor, schema.validate_cbor, message)
else:
schema.validate_cbor(message)
# The CBOR parser will allocate more memory, but at least we can feed
# it the file-like object, so that if it's large it won't be make two
# copies.
request.content.seek(SEEK_SET, 0)
# Typically deserialization to Python will not release the GIL, and
# indeed as of Jan 2023 cbor2 didn't have any code to release the GIL
# in the decode path. As such, running it in a different thread has no benefit.
return cbor2.load(request.content)
##### Generic APIs #####
@_authorized_route(_app, set(), "/storage/v1/version", methods=["GET"])
@ -563,10 +602,14 @@ class HTTPServer(object):
"/storage/v1/immutable/<storage_index:storage_index>",
methods=["POST"],
)
def allocate_buckets(self, request, authorization, storage_index):
@async_to_deferred
async def allocate_buckets(self, request, authorization, storage_index):
"""Allocate buckets."""
upload_secret = authorization[Secrets.UPLOAD]
info = self._read_encoded(request, _SCHEMAS["allocate_buckets"])
# It's just a list of up to ~256 shares, shouldn't use many bytes.
info = await self._read_encoded(
request, _SCHEMAS["allocate_buckets"], max_size=8192
)
# We do NOT validate the upload secret for existing bucket uploads.
# Another upload may be happening in parallel, with a different upload
@ -588,7 +631,7 @@ class HTTPServer(object):
storage_index, share_number, upload_secret, bucket
)
return self._send_encoded(
return await self._send_encoded(
request,
{"already-have": set(already_got), "allocated": set(sharenum_to_bucket)},
)
@ -723,7 +766,8 @@ class HTTPServer(object):
"/storage/v1/immutable/<storage_index:storage_index>/<int(signed=False):share_number>/corrupt",
methods=["POST"],
)
def advise_corrupt_share_immutable(
@async_to_deferred
async def advise_corrupt_share_immutable(
self, request, authorization, storage_index, share_number
):
"""Indicate that given share is corrupt, with a text reason."""
@ -732,7 +776,11 @@ class HTTPServer(object):
except KeyError:
raise _HTTPError(http.NOT_FOUND)
info = self._read_encoded(request, _SCHEMAS["advise_corrupt_share"])
# The reason can be a string with explanation, so in theory it could be
# longish?
info = await self._read_encoded(
request, _SCHEMAS["advise_corrupt_share"], max_size=32768,
)
bucket.advise_corrupt_share(info["reason"].encode("utf-8"))
return b""
@ -744,9 +792,12 @@ class HTTPServer(object):
"/storage/v1/mutable/<storage_index:storage_index>/read-test-write",
methods=["POST"],
)
def mutable_read_test_write(self, request, authorization, storage_index):
@async_to_deferred
async def mutable_read_test_write(self, request, authorization, storage_index):
"""Read/test/write combined operation for mutables."""
rtw_request = self._read_encoded(request, _SCHEMAS["mutable_read_test_write"])
rtw_request = await self._read_encoded(
request, _SCHEMAS["mutable_read_test_write"], max_size=2**48
)
secrets = (
authorization[Secrets.WRITE_ENABLER],
authorization[Secrets.LEASE_RENEW],
@ -771,7 +822,9 @@ class HTTPServer(object):
)
except BadWriteEnablerError:
raise _HTTPError(http.UNAUTHORIZED)
return self._send_encoded(request, {"success": success, "data": read_data})
return await self._send_encoded(
request, {"success": success, "data": read_data}
)
@_authorized_route(
_app,
@ -816,7 +869,8 @@ class HTTPServer(object):
"/storage/v1/mutable/<storage_index:storage_index>/<int(signed=False):share_number>/corrupt",
methods=["POST"],
)
def advise_corrupt_share_mutable(
@async_to_deferred
async def advise_corrupt_share_mutable(
self, request, authorization, storage_index, share_number
):
"""Indicate that given share is corrupt, with a text reason."""
@ -825,7 +879,11 @@ class HTTPServer(object):
}:
raise _HTTPError(http.NOT_FOUND)
info = self._read_encoded(request, _SCHEMAS["advise_corrupt_share"])
# The reason can be a string with explanation, so in theory it could be
# longish?
info = await self._read_encoded(
request, _SCHEMAS["advise_corrupt_share"], max_size=32768
)
self._storage_server.advise_corrupt_share(
b"mutable", storage_index, share_number, info["reason"].encode("utf-8")
)

View File

@ -33,9 +33,13 @@ Ported to Python 3.
from __future__ import annotations
from six import ensure_text
from typing import Union
import re, time, hashlib
from typing import Union, Any
from os import urandom
import re
import time
import hashlib
from configparser import NoSectionError
import attr
@ -67,6 +71,12 @@ from allmydata.interfaces import (
IStorageServer,
IFoolscapStoragePlugin,
)
from allmydata.grid_manager import (
create_grid_manager_verifier,
)
from allmydata.crypto import (
ed25519,
)
from allmydata.util import log, base32, connection_status
from allmydata.util.assertutil import precondition
from allmydata.util.observer import ObserverList
@ -79,6 +89,7 @@ from allmydata.storage.http_client import (
ClientException as HTTPClientException, StorageClientMutables,
ReadVector, TestWriteVectors, WriteVector, TestVector, ClientException
)
from .node import _Config
ANONYMOUS_STORAGE_NURLS = "anonymous-storage-NURLs"
@ -112,9 +123,15 @@ class StorageClientConfig(object):
:ivar dict[unicode, dict[unicode, unicode]] storage_plugins: A mapping from
names of ``IFoolscapStoragePlugin`` configured in *tahoe.cfg* to the
respective configuration.
:ivar list[ed25519.VerifyKey] grid_manager_keys: with no keys in
this list, we'll upload to any storage server. Otherwise, we will
only upload to a storage-server that has a valid certificate
signed by at least one of these keys.
"""
preferred_peers = attr.ib(default=())
storage_plugins = attr.ib(default=attr.Factory(dict))
grid_manager_keys = attr.ib(default=attr.Factory(list))
@classmethod
def from_node_config(cls, config):
@ -146,9 +163,17 @@ class StorageClientConfig(object):
plugin_config = []
storage_plugins[plugin_name] = dict(plugin_config)
grid_manager_keys = []
for name, gm_key in config.enumerate_section('grid_managers').items():
grid_manager_keys.append(
ed25519.verifying_key_from_string(gm_key.encode("ascii"))
)
return cls(
preferred_peers,
storage_plugins,
grid_manager_keys,
)
@ -175,7 +200,7 @@ class StorageFarmBroker(service.MultiService):
self,
permute_peers,
tub_maker,
node_config,
node_config: _Config,
storage_client_config=None,
):
service.MultiService.__init__(self)
@ -194,9 +219,9 @@ class StorageFarmBroker(service.MultiService):
# own Reconnector, and will give us a RemoteReference when we ask
# them for it.
self.servers = BytesKeyDict()
self._static_server_ids = set() # ignore announcements for these
self._static_server_ids : set[bytes] = set() # ignore announcements for these
self.introducer_client = None
self._threshold_listeners = [] # tuples of (threshold, Deferred)
self._threshold_listeners : list[tuple[float,defer.Deferred[Any]]]= [] # tuples of (threshold, Deferred)
self._connected_high_water_mark = 0
@log_call(action_type=u"storage-client:broker:set-static-servers")
@ -250,6 +275,16 @@ class StorageFarmBroker(service.MultiService):
in self.storage_client_config.storage_plugins.items()
})
@staticmethod
def _should_we_use_http(node_config: _Config, announcement: dict) -> bool:
"""
Given an announcement dictionary and config, return whether we should
connect to storage server over HTTP.
"""
return not node_config.get_config(
"client", "force_foolscap", default=True, boolean=True,
) and len(announcement.get(ANONYMOUS_STORAGE_NURLS, [])) > 0
@log_call(
action_type=u"storage-client:broker:make-storage-server",
include_args=["server_id"],
@ -269,10 +304,21 @@ class StorageFarmBroker(service.MultiService):
by the given announcement.
"""
assert isinstance(server_id, bytes)
if len(server["ann"].get(ANONYMOUS_STORAGE_NURLS, [])) > 0:
s = HTTPNativeStorageServer(server_id, server["ann"])
gm_verifier = create_grid_manager_verifier(
self.storage_client_config.grid_manager_keys,
server["ann"].get("grid-manager-certificates", []),
"pub-{}".format(str(server_id, "ascii")), # server_id is v0-<key> not pub-v0-key .. for reasons?
)
if self._should_we_use_http(self.node_config, server["ann"]):
s = HTTPNativeStorageServer(
server_id,
server["ann"],
grid_manager_verifier=gm_verifier,
)
s.on_status_changed(lambda _: self._got_connection())
return s
handler_overrides = server.get("connections", {})
s = NativeStorageServer(
server_id,
@ -281,6 +327,7 @@ class StorageFarmBroker(service.MultiService):
handler_overrides,
self.node_config,
self.storage_client_config,
gm_verifier,
)
s.on_status_changed(lambda _: self._got_connection())
return s
@ -429,11 +476,26 @@ class StorageFarmBroker(service.MultiService):
for dsc in list(self.servers.values()):
dsc.try_to_connect()
def get_servers_for_psi(self, peer_selection_index):
def get_servers_for_psi(self, peer_selection_index, for_upload=False):
"""
:param for_upload: used to determine if we should include any
servers that are invalid according to Grid Manager
processing. When for_upload is True and we have any Grid
Manager keys configured, any storage servers with invalid or
missing certificates will be excluded.
"""
# return a list of server objects (IServers)
assert self.permute_peers == True
connected_servers = self.get_connected_servers()
preferred_servers = frozenset(s for s in connected_servers if s.get_longname() in self.preferred_peers)
if for_upload:
# print("upload processing: {}".format([srv.upload_permitted() for srv in connected_servers]))
connected_servers = [
srv
for srv in connected_servers
if srv.upload_permitted()
]
def _permuted(server):
seed = server.get_permutation_seed()
is_unpreferred = server not in preferred_servers
@ -609,9 +671,10 @@ class _FoolscapStorage(object):
{"permutation-seed-base32": "...",
"nickname": "...",
"grid-manager-certificates": [..],
}
*nickname* is optional.
*nickname* and *grid-manager-certificates* are optional.
The furl will be a Unicode string on Python 3; on Python 2 it will be
either a native (bytes) string or a Unicode string.
@ -741,7 +804,8 @@ class NativeStorageServer(service.MultiService):
"application-version": "unknown: no get_version()",
})
def __init__(self, server_id, ann, tub_maker, handler_overrides, node_config, config=StorageClientConfig()):
def __init__(self, server_id, ann, tub_maker, handler_overrides, node_config, config=None,
grid_manager_verifier=None):
service.MultiService.__init__(self)
assert isinstance(server_id, bytes)
self._server_id = server_id
@ -749,6 +813,11 @@ class NativeStorageServer(service.MultiService):
self._tub_maker = tub_maker
self._handler_overrides = handler_overrides
if config is None:
config = StorageClientConfig()
self._grid_manager_verifier = grid_manager_verifier
self._storage = self._make_storage_system(node_config, config, ann)
self.last_connect_time = None
@ -759,6 +828,21 @@ class NativeStorageServer(service.MultiService):
self._trigger_cb = None
self._on_status_changed = ObserverList()
def upload_permitted(self):
"""
If our client is configured with Grid Manager public-keys, we will
only upload to storage servers that have a currently-valid
certificate signed by at least one of the Grid Managers we
accept.
:return: True if we should use this server for uploads, False
otherwise.
"""
# if we have no Grid Manager keys configured, choice is easy
if self._grid_manager_verifier is None:
return True
return self._grid_manager_verifier()
def _make_storage_system(self, node_config, config, ann):
"""
:param allmydata.node._Config node_config: The node configuration to pass
@ -945,13 +1029,14 @@ class HTTPNativeStorageServer(service.MultiService):
"connected".
"""
def __init__(self, server_id: bytes, announcement, reactor=reactor):
def __init__(self, server_id: bytes, announcement, reactor=reactor, grid_manager_verifier=None):
service.MultiService.__init__(self)
assert isinstance(server_id, bytes)
self._server_id = server_id
self.announcement = announcement
self._on_status_changed = ObserverList()
self._reactor = reactor
self._grid_manager_verifier = grid_manager_verifier
furl = announcement["anonymous-storage-FURL"].encode("utf-8")
(
self._nickname,
@ -1001,6 +1086,21 @@ class HTTPNativeStorageServer(service.MultiService):
"""
return self._on_status_changed.subscribe(status_changed)
def upload_permitted(self):
"""
If our client is configured with Grid Manager public-keys, we will
only upload to storage servers that have a currently-valid
certificate signed by at least one of the Grid Managers we
accept.
:return: True if we should use this server for uploads, False
otherwise.
"""
# if we have no Grid Manager keys configured, choice is easy
if self._grid_manager_verifier is None:
return True
return self._grid_manager_verifier()
# Special methods used by copy.copy() and copy.deepcopy(). When those are
# used in allmydata.immutable.filenode to copy CheckResults during
# repair, we want it to treat the IServer instances as singletons, and

View File

@ -1,210 +0,0 @@
"""
This module is only necessary on Python 2. Once Python 2 code is dropped, it
can be deleted.
"""
from future.utils import PY3
if PY3:
raise RuntimeError("Just use subprocess.Popen")
# This is necessary to pacify flake8 on Python 3, while we're still supporting
# Python 2.
from past.builtins import unicode
# -*- coding: utf-8 -*-
## Copyright (C) 2021 Valentin Lab
##
## Redistribution and use in source and binary forms, with or without
## modification, are permitted provided that the following conditions
## are met:
##
## 1. Redistributions of source code must retain the above copyright
## notice, this list of conditions and the following disclaimer.
##
## 2. Redistributions in binary form must reproduce the above
## copyright notice, this list of conditions and the following
## disclaimer in the documentation and/or other materials provided
## with the distribution.
##
## 3. Neither the name of the copyright holder nor the names of its
## contributors may be used to endorse or promote products derived
## from this software without specific prior written permission.
##
## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
## FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
## COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
## INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
## (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
## SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
## OF THE POSSIBILITY OF SUCH DAMAGE.
##
## issue: https://bugs.python.org/issue19264
# See allmydata/windows/fixups.py
import sys
assert sys.platform == "win32"
import os
import ctypes
import subprocess
import _subprocess
from ctypes import byref, windll, c_char_p, c_wchar_p, c_void_p, \
Structure, sizeof, c_wchar, WinError
from ctypes.wintypes import BYTE, WORD, LPWSTR, BOOL, DWORD, LPVOID, \
HANDLE
##
## Types
##
CREATE_UNICODE_ENVIRONMENT = 0x00000400
LPCTSTR = c_char_p
LPTSTR = c_wchar_p
LPSECURITY_ATTRIBUTES = c_void_p
LPBYTE = ctypes.POINTER(BYTE)
class STARTUPINFOW(Structure):
_fields_ = [
("cb", DWORD), ("lpReserved", LPWSTR),
("lpDesktop", LPWSTR), ("lpTitle", LPWSTR),
("dwX", DWORD), ("dwY", DWORD),
("dwXSize", DWORD), ("dwYSize", DWORD),
("dwXCountChars", DWORD), ("dwYCountChars", DWORD),
("dwFillAtrribute", DWORD), ("dwFlags", DWORD),
("wShowWindow", WORD), ("cbReserved2", WORD),
("lpReserved2", LPBYTE), ("hStdInput", HANDLE),
("hStdOutput", HANDLE), ("hStdError", HANDLE),
]
LPSTARTUPINFOW = ctypes.POINTER(STARTUPINFOW)
class PROCESS_INFORMATION(Structure):
_fields_ = [
("hProcess", HANDLE), ("hThread", HANDLE),
("dwProcessId", DWORD), ("dwThreadId", DWORD),
]
LPPROCESS_INFORMATION = ctypes.POINTER(PROCESS_INFORMATION)
class DUMMY_HANDLE(ctypes.c_void_p):
def __init__(self, *a, **kw):
super(DUMMY_HANDLE, self).__init__(*a, **kw)
self.closed = False
def Close(self):
if not self.closed:
windll.kernel32.CloseHandle(self)
self.closed = True
def __int__(self):
return self.value
CreateProcessW = windll.kernel32.CreateProcessW
CreateProcessW.argtypes = [
LPCTSTR, LPTSTR, LPSECURITY_ATTRIBUTES,
LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPCTSTR,
LPSTARTUPINFOW, LPPROCESS_INFORMATION,
]
CreateProcessW.restype = BOOL
##
## Patched functions/classes
##
def CreateProcess(executable, args, _p_attr, _t_attr,
inherit_handles, creation_flags, env, cwd,
startup_info):
"""Create a process supporting unicode executable and args for win32
Python implementation of CreateProcess using CreateProcessW for Win32
"""
si = STARTUPINFOW(
dwFlags=startup_info.dwFlags,
wShowWindow=startup_info.wShowWindow,
cb=sizeof(STARTUPINFOW),
## XXXvlab: not sure of the casting here to ints.
hStdInput=int(startup_info.hStdInput),
hStdOutput=int(startup_info.hStdOutput),
hStdError=int(startup_info.hStdError),
)
wenv = None
if env is not None:
## LPCWSTR seems to be c_wchar_p, so let's say CWSTR is c_wchar
env = (unicode("").join([
unicode("%s=%s\0") % (k, v)
for k, v in env.items()])) + unicode("\0")
wenv = (c_wchar * len(env))()
wenv.value = env
pi = PROCESS_INFORMATION()
creation_flags |= CREATE_UNICODE_ENVIRONMENT
if CreateProcessW(executable, args, None, None,
inherit_handles, creation_flags,
wenv, cwd, byref(si), byref(pi)):
return (DUMMY_HANDLE(pi.hProcess), DUMMY_HANDLE(pi.hThread),
pi.dwProcessId, pi.dwThreadId)
raise WinError()
class Popen(subprocess.Popen):
"""This superseeds Popen and corrects a bug in cPython 2.7 implem"""
def _execute_child(self, args, executable, preexec_fn, close_fds,
cwd, env, universal_newlines,
startupinfo, creationflags, shell, to_close,
p2cread, p2cwrite,
c2pread, c2pwrite,
errread, errwrite):
"""Code from part of _execute_child from Python 2.7 (9fbb65e)
There are only 2 little changes concerning the construction of
the the final string in shell mode: we preempt the creation of
the command string when shell is True, because original function
will try to encode unicode args which we want to avoid to be able to
sending it as-is to ``CreateProcess``.
"""
if not isinstance(args, subprocess.types.StringTypes):
args = subprocess.list2cmdline(args)
if startupinfo is None:
startupinfo = subprocess.STARTUPINFO()
if shell:
startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW
startupinfo.wShowWindow = _subprocess.SW_HIDE
comspec = os.environ.get("COMSPEC", unicode("cmd.exe"))
args = unicode('{} /c "{}"').format(comspec, args)
if (_subprocess.GetVersion() >= 0x80000000 or
os.path.basename(comspec).lower() == "command.com"):
w9xpopen = self._find_w9xpopen()
args = unicode('"%s" %s') % (w9xpopen, args)
creationflags |= _subprocess.CREATE_NEW_CONSOLE
cp = _subprocess.CreateProcess
_subprocess.CreateProcess = CreateProcess
try:
super(Popen, self)._execute_child(
args, executable,
preexec_fn, close_fds, cwd, env, universal_newlines,
startupinfo, creationflags, False, to_close, p2cread,
p2cwrite, c2pread, c2pwrite, errread, errwrite,
)
finally:
_subprocess.CreateProcess = cp

View File

@ -10,26 +10,33 @@ from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from six.moves import StringIO
# We're going to override stdin/stderr, so want to match their behavior on respective Python versions.
from io import StringIO
from twisted.python.usage import (
UsageError,
)
from twisted.python.filepath import (
FilePath,
)
from testtools.matchers import (
Contains,
)
from twisted.python.filepath import (
FilePath,
)
from allmydata.scripts.admin import (
migrate_crawler,
add_grid_manager_cert,
)
from allmydata.scripts.runner import (
Options,
)
from allmydata.util import jsonbytes as json
from ..common import (
SyncTestCase,
)
class AdminMigrateCrawler(SyncTestCase):
"""
Tests related to 'tahoe admin migrate-crawler'
@ -85,3 +92,162 @@ class AdminMigrateCrawler(SyncTestCase):
str(options),
Contains("security issues with pickle")
)
fake_cert = {
"certificate": "{\"expires\":1601687822,\"public_key\":\"pub-v0-cbq6hcf3pxcz6ouoafrbktmkixkeuywpcpbcomzd3lqbkq4nmfga\",\"version\":1}",
"signature": "fvjd3uvvupf2v6tnvkwjd473u3m3inyqkwiclhp7balmchkmn3px5pei3qyfjnhymq4cjcwvbpqmcwwnwswdtrfkpnlaxuih2zbdmda"
}
class AddCertificateOptions(SyncTestCase):
"""
Tests for 'tahoe admin add-grid-manager-cert' option validation
"""
def setUp(self):
self.tahoe = Options()
return super(AddCertificateOptions, self).setUp()
def test_parse_no_data(self):
"""
When no data is passed to stdin an error is produced
"""
self.tahoe.stdin = StringIO("")
self.tahoe.stderr = StringIO() # suppress message
with self.assertRaises(UsageError) as ctx:
self.tahoe.parseOptions(
[
"admin", "add-grid-manager-cert",
"--name", "random-name",
"--filename", "-",
]
)
self.assertIn(
"Reading certificate from stdin failed",
str(ctx.exception)
)
def test_read_cert_file(self):
"""
A certificate can be read from a file
"""
tmp = self.mktemp()
with open(tmp, "wb") as f:
f.write(json.dumps_bytes(fake_cert))
# certificate should be loaded
self.tahoe.parseOptions(
[
"admin", "add-grid-manager-cert",
"--name", "random-name",
"--filename", tmp,
]
)
opts = self.tahoe.subOptions.subOptions
self.assertEqual(
fake_cert,
opts.certificate_data
)
def test_bad_certificate(self):
"""
Unparseable data produces an error
"""
self.tahoe.stdin = StringIO("{}")
self.tahoe.stderr = StringIO() # suppress message
with self.assertRaises(UsageError) as ctx:
self.tahoe.parseOptions(
[
"admin", "add-grid-manager-cert",
"--name", "random-name",
"--filename", "-",
]
)
self.assertIn(
"Grid Manager certificate must contain",
str(ctx.exception)
)
class AddCertificateCommand(SyncTestCase):
"""
Tests for 'tahoe admin add-grid-manager-cert' operation
"""
def setUp(self):
self.tahoe = Options()
self.node_path = FilePath(self.mktemp())
self.node_path.makedirs()
with self.node_path.child("tahoe.cfg").open("w") as f:
f.write(b"# minimal test config\n")
return super(AddCertificateCommand, self).setUp()
def test_add_one(self):
"""
Adding a certificate succeeds
"""
self.tahoe.stdin = StringIO(json.dumps(fake_cert))
self.tahoe.stderr = StringIO()
self.tahoe.parseOptions(
[
"--node-directory", self.node_path.path,
"admin", "add-grid-manager-cert",
"--name", "zero",
"--filename", "-",
]
)
self.tahoe.subOptions.subOptions.stdin = self.tahoe.stdin
self.tahoe.subOptions.subOptions.stderr = self.tahoe.stderr
rc = add_grid_manager_cert(self.tahoe.subOptions.subOptions)
self.assertEqual(rc, 0)
self.assertEqual(
{"zero.cert", "tahoe.cfg"},
set(self.node_path.listdir())
)
self.assertIn(
"There are now 1 certificates",
self.tahoe.stderr.getvalue()
)
def test_add_two(self):
"""
An error message is produced when adding a certificate with a
duplicate name.
"""
self.tahoe.stdin = StringIO(json.dumps(fake_cert))
self.tahoe.stderr = StringIO()
self.tahoe.parseOptions(
[
"--node-directory", self.node_path.path,
"admin", "add-grid-manager-cert",
"--name", "zero",
"--filename", "-",
]
)
self.tahoe.subOptions.subOptions.stdin = self.tahoe.stdin
self.tahoe.subOptions.subOptions.stderr = self.tahoe.stderr
rc = add_grid_manager_cert(self.tahoe.subOptions.subOptions)
self.assertEqual(rc, 0)
self.tahoe.stdin = StringIO(json.dumps(fake_cert))
self.tahoe.parseOptions(
[
"--node-directory", self.node_path.path,
"admin", "add-grid-manager-cert",
"--name", "zero",
"--filename", "-",
]
)
self.tahoe.subOptions.subOptions.stdin = self.tahoe.stdin
self.tahoe.subOptions.subOptions.stderr = self.tahoe.stderr
rc = add_grid_manager_cert(self.tahoe.subOptions.subOptions)
self.assertEqual(rc, 1)
self.assertIn(
"Already have certificate for 'zero'",
self.tahoe.stderr.getvalue()
)

View File

@ -0,0 +1,314 @@
"""
Tests for the grid manager CLI.
"""
import os
from io import (
BytesIO,
)
from unittest import (
skipIf,
)
from twisted.trial.unittest import (
TestCase,
)
from allmydata.cli.grid_manager import (
grid_manager,
)
import click.testing
# these imports support the tests for `tahoe *` subcommands
from ..common_util import (
run_cli,
)
from twisted.internet.defer import (
inlineCallbacks,
)
from twisted.python.filepath import (
FilePath,
)
from twisted.python.runtime import (
platform,
)
from allmydata.util import jsonbytes as json
class GridManagerCommandLine(TestCase):
"""
Test the mechanics of the `grid-manager` command
"""
def setUp(self):
self.runner = click.testing.CliRunner()
super(GridManagerCommandLine, self).setUp()
def invoke_and_check(self, *args, **kwargs):
"""Invoke a command with the runner and ensure it succeeded."""
result = self.runner.invoke(*args, **kwargs)
if result.exception is not None:
raise result.exc_info[1].with_traceback(result.exc_info[2])
self.assertEqual(result.exit_code, 0, result)
return result
def test_create(self):
"""
Create a new grid-manager
"""
with self.runner.isolated_filesystem():
result = self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
self.assertEqual(["foo"], os.listdir("."))
self.assertEqual(["config.json"], os.listdir("./foo"))
result = self.invoke_and_check(grid_manager, ["--config", "foo", "public-identity"])
self.assertTrue(result.output.startswith("pub-v0-"))
def test_load_invalid(self):
"""
An invalid config is reported to the user
"""
with self.runner.isolated_filesystem():
with open("config.json", "wb") as f:
f.write(json.dumps_bytes({"not": "valid"}))
result = self.runner.invoke(grid_manager, ["--config", ".", "public-identity"])
self.assertNotEqual(result.exit_code, 0)
self.assertIn(
"Error loading Grid Manager",
result.output,
)
def test_create_already(self):
"""
It's an error to create a new grid-manager in an existing
directory.
"""
with self.runner.isolated_filesystem():
result = self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
result = self.runner.invoke(grid_manager, ["--config", "foo", "create"])
self.assertEqual(1, result.exit_code)
self.assertIn(
"Can't create",
result.stdout,
)
def test_create_stdout(self):
"""
Create a new grid-manager with no files
"""
with self.runner.isolated_filesystem():
result = self.invoke_and_check(grid_manager, ["--config", "-", "create"])
self.assertEqual([], os.listdir("."))
config = json.loads(result.output)
self.assertEqual(
{"private_key", "grid_manager_config_version"},
set(config.keys()),
)
def test_list_stdout(self):
"""
Load Grid Manager without files (using 'list' subcommand, but any will do)
"""
config = {
"storage_servers": {
"storage0": {
"public_key": "pub-v0-cbq6hcf3pxcz6ouoafrbktmkixkeuywpcpbcomzd3lqbkq4nmfga"
}
},
"private_key": "priv-v0-6uinzyaxy3zvscwgsps5pxcfezhrkfb43kvnrbrhhfzyduyqnniq",
"grid_manager_config_version": 0
}
result = self.invoke_and_check(
grid_manager, ["--config", "-", "list"],
input=BytesIO(json.dumps_bytes(config)),
)
self.assertEqual(result.exit_code, 0)
self.assertEqual(
"storage0: pub-v0-cbq6hcf3pxcz6ouoafrbktmkixkeuywpcpbcomzd3lqbkq4nmfga\n",
result.output,
)
def test_add_and_sign(self):
"""
Add a new storage-server and sign a certificate for it
"""
pubkey = "pub-v0-cbq6hcf3pxcz6ouoafrbktmkixkeuywpcpbcomzd3lqbkq4nmfga"
with self.runner.isolated_filesystem():
self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
self.invoke_and_check(grid_manager, ["--config", "foo", "add", "storage0", pubkey])
result = self.invoke_and_check(grid_manager, ["--config", "foo", "sign", "storage0", "10"])
sigcert = json.loads(result.output)
self.assertEqual({"certificate", "signature"}, set(sigcert.keys()))
cert = json.loads(sigcert['certificate'])
self.assertEqual(cert["public_key"], pubkey)
def test_add_and_sign_second_cert(self):
"""
Add a new storage-server and sign two certificates.
"""
pubkey = "pub-v0-cbq6hcf3pxcz6ouoafrbktmkixkeuywpcpbcomzd3lqbkq4nmfga"
with self.runner.isolated_filesystem():
self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
self.invoke_and_check(grid_manager, ["--config", "foo", "add", "storage0", pubkey])
self.invoke_and_check(grid_manager, ["--config", "foo", "sign", "storage0", "10"])
self.invoke_and_check(grid_manager, ["--config", "foo", "sign", "storage0", "10"])
# we should now have two certificates stored
self.assertEqual(
set(FilePath("foo").listdir()),
{'storage0.cert.1', 'storage0.cert.0', 'config.json'},
)
def test_add_twice(self):
"""
An error is reported trying to add an existing server
"""
pubkey0 = "pub-v0-cbq6hcf3pxcz6ouoafrbktmkixkeuywpcpbcomzd3lqbkq4nmfga"
pubkey1 = "pub-v0-5ysc55trfvfvg466v46j4zmfyltgus3y2gdejifctv7h4zkuyveq"
with self.runner.isolated_filesystem():
self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
self.invoke_and_check(grid_manager, ["--config", "foo", "add", "storage0", pubkey0])
result = self.runner.invoke(grid_manager, ["--config", "foo", "add", "storage0", pubkey1])
self.assertNotEquals(result.exit_code, 0)
self.assertIn(
"A storage-server called 'storage0' already exists",
result.output,
)
def test_add_list_remove(self):
"""
Add a storage server, list it, remove it.
"""
pubkey = "pub-v0-cbq6hcf3pxcz6ouoafrbktmkixkeuywpcpbcomzd3lqbkq4nmfga"
with self.runner.isolated_filesystem():
self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
self.invoke_and_check(grid_manager, ["--config", "foo", "add", "storage0", pubkey])
self.invoke_and_check(grid_manager, ["--config", "foo", "sign", "storage0", "1"])
result = self.invoke_and_check(grid_manager, ["--config", "foo", "list"])
names = [
line.split(':')[0]
for line in result.output.strip().split('\n')
if not line.startswith(" ") # "cert" lines start with whitespace
]
self.assertEqual(names, ["storage0"])
self.invoke_and_check(grid_manager, ["--config", "foo", "remove", "storage0"])
result = self.invoke_and_check(grid_manager, ["--config", "foo", "list"])
self.assertEqual(result.output.strip(), "")
def test_remove_missing(self):
"""
Error reported when removing non-existant server
"""
with self.runner.isolated_filesystem():
self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
result = self.runner.invoke(grid_manager, ["--config", "foo", "remove", "storage0"])
self.assertNotEquals(result.exit_code, 0)
self.assertIn(
"No storage-server called 'storage0' exists",
result.output,
)
def test_sign_missing(self):
"""
Error reported when signing non-existant server
"""
with self.runner.isolated_filesystem():
self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
result = self.runner.invoke(grid_manager, ["--config", "foo", "sign", "storage0", "42"])
self.assertNotEquals(result.exit_code, 0)
self.assertIn(
"No storage-server called 'storage0' exists",
result.output,
)
@skipIf(not platform.isLinux(), "I only know how permissions work on linux")
def test_sign_bad_perms(self):
"""
Error reported if we can't create certificate file
"""
pubkey = "pub-v0-cbq6hcf3pxcz6ouoafrbktmkixkeuywpcpbcomzd3lqbkq4nmfga"
with self.runner.isolated_filesystem():
self.invoke_and_check(grid_manager, ["--config", "foo", "create"])
self.invoke_and_check(grid_manager, ["--config", "foo", "add", "storage0", pubkey])
# make the directory un-writable (so we can't create a new cert)
os.chmod("foo", 0o550)
result = self.runner.invoke(grid_manager, ["--config", "foo", "sign", "storage0", "42"])
self.assertEquals(result.exit_code, 1)
self.assertIn(
"Permission denied",
result.output,
)
class TahoeAddGridManagerCert(TestCase):
"""
Test `tahoe admin add-grid-manager-cert` subcommand
"""
@inlineCallbacks
def test_help(self):
"""
some kind of help is printed
"""
code, out, err = yield run_cli("admin", "add-grid-manager-cert")
self.assertEqual(err, "")
self.assertNotEqual(0, code)
@inlineCallbacks
def test_no_name(self):
"""
error to miss --name option
"""
code, out, err = yield run_cli(
"admin", "add-grid-manager-cert", "--filename", "-",
stdin=b"the cert",
)
self.assertIn(
"Must provide --name",
out
)
@inlineCallbacks
def test_no_filename(self):
"""
error to miss --name option
"""
code, out, err = yield run_cli(
"admin", "add-grid-manager-cert", "--name", "foo",
stdin=b"the cert",
)
self.assertIn(
"Must provide --filename",
out
)
@inlineCallbacks
def test_add_one(self):
"""
we can add a certificate
"""
nodedir = self.mktemp()
fake_cert = b"""{"certificate": "", "signature": ""}"""
code, out, err = yield run_cli(
"--node-directory", nodedir,
"admin", "add-grid-manager-cert", "-f", "-", "--name", "foo",
stdin=fake_cert,
ignore_stderr=True,
)
nodepath = FilePath(nodedir)
with nodepath.child("tahoe.cfg").open("r") as f:
config_data = f.read()
self.assertIn("tahoe.cfg", nodepath.listdir())
self.assertIn(
b"foo = foo.cert",
config_data,
)
self.assertIn("foo.cert", nodepath.listdir())
with nodepath.child("foo.cert").open("r") as f:
self.assertEqual(
json.load(f),
json.loads(fake_cert)
)

View File

@ -1,19 +1,18 @@
"""
Ported to Python 3.
Tests for the ``tahoe put`` CLI tool.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from future.utils import PY2
if PY2:
from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401
from __future__ import annotations
from typing import Callable, Awaitable, TypeVar, Any
import os.path
from twisted.trial import unittest
from twisted.python import usage
from twisted.python.filepath import FilePath
from cryptography.hazmat.primitives.serialization import load_pem_private_key
from allmydata.crypto.rsa import PrivateKey
from allmydata.uri import from_string
from allmydata.util import fileutil
from allmydata.scripts.common import get_aliases
from allmydata.scripts import cli
@ -22,6 +21,9 @@ from ..common_util import skip_if_cannot_represent_filename
from allmydata.util.encodingutil import get_io_encoding
from allmydata.util.fileutil import abspath_expanduser_unicode
from .common import CLITestMixin
from allmydata.mutable.common import derive_mutable_keys
T = TypeVar("T")
class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
@ -215,6 +217,65 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
return d
async def test_unlinked_mutable_specified_private_key(self) -> None:
"""
A new unlinked mutable can be created using a specified private
key.
"""
self.basedir = "cli/Put/unlinked-mutable-with-key"
await self._test_mutable_specified_key(
lambda do_cli, pempath, datapath: do_cli(
"put", "--mutable", "--private-key-path", pempath.path,
stdin=datapath.getContent(),
),
)
async def test_linked_mutable_specified_private_key(self) -> None:
"""
A new linked mutable can be created using a specified private key.
"""
self.basedir = "cli/Put/linked-mutable-with-key"
await self._test_mutable_specified_key(
lambda do_cli, pempath, datapath: do_cli(
"put", "--mutable", "--private-key-path", pempath.path, datapath.path,
),
)
async def _test_mutable_specified_key(
self,
run: Callable[[Any, FilePath, FilePath], Awaitable[tuple[int, bytes, bytes]]],
) -> None:
"""
A helper for testing mutable creation.
:param run: A function to do the creation. It is called with
``self.do_cli`` and the path to a private key PEM file and a data
file. It returns whatever ``do_cli`` returns.
"""
self.set_up_grid(oneshare=True)
pempath = FilePath(__file__).parent().sibling("data").child("openssl-rsa-2048.txt")
datapath = FilePath(self.basedir).child("data")
datapath.setContent(b"Hello world" * 1024)
(rc, out, err) = await run(self.do_cli, pempath, datapath)
self.assertEqual(rc, 0, (out, err))
cap = from_string(out.strip())
# The capability is derived from the key we specified.
privkey = load_pem_private_key(pempath.getContent(), password=None)
assert isinstance(privkey, PrivateKey)
pubkey = privkey.public_key()
writekey, _, fingerprint = derive_mutable_keys((pubkey, privkey))
self.assertEqual(
(writekey, fingerprint),
(cap.writekey, cap.fingerprint),
)
# Also the capability we were given actually refers to the data we
# uploaded.
(rc, out, err) = await self.do_cli("get", out.strip())
self.assertEqual(rc, 0, (out, err))
self.assertEqual(out, datapath.getContent().decode("ascii"))
def test_mutable(self):
# echo DATA1 | tahoe put --mutable - uploaded.txt
# echo DATA2 | tahoe put - uploaded.txt # should modify-in-place

View File

@ -34,7 +34,7 @@ from __future__ import annotations
from typing import Iterator, Optional, List, Tuple
from collections.abc import Awaitable
from inspect import getargspec
from inspect import getfullargspec
from itertools import count
from sys import stderr
@ -141,8 +141,8 @@ def _verify():
"""
# Poor man's interface verification.
a = getargspec(create)
b = getargspec(MemoryWormholeServer.create)
a = getfullargspec(create)
b = getfullargspec(MemoryWormholeServer.create)
# I know it has a `self` argument at the beginning. That's okay.
b = b._replace(args=b.args[1:])
assert a == b, "{} != {}".format(a, b)

Some files were not shown because too many files have changed in this diff Show More