diff --git a/.circleci/config.yml b/.circleci/config.yml index 29b55ad5f..42e47957c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -29,7 +29,7 @@ workflows: - "debian-9": &DOCKERHUB_CONTEXT context: "dockerhub-auth" - - "debian-8": + - "debian-10": <<: *DOCKERHUB_CONTEXT requires: - "debian-9" @@ -107,7 +107,7 @@ workflows: - "master" jobs: - - "build-image-debian-8": + - "build-image-debian-10": <<: *DOCKERHUB_CONTEXT - "build-image-debian-9": <<: *DOCKERHUB_CONTEXT @@ -277,11 +277,11 @@ jobs: fi - debian-8: + debian-10: <<: *DEBIAN docker: - <<: *DOCKERHUB_AUTH - image: "tahoelafsci/debian:8-py2.7" + image: "tahoelafsci/debian:10-py2.7" user: "nobody" @@ -529,12 +529,12 @@ jobs: docker push tahoelafsci/${DISTRO}:${TAG}-py${PYTHON_VERSION} - build-image-debian-8: + build-image-debian-10: <<: *BUILD_IMAGE environment: DISTRO: "debian" - TAG: "8" + TAG: "10" PYTHON_VERSION: "2.7" diff --git a/newsfragments/3326.installation b/newsfragments/3326.installation new file mode 100644 index 000000000..2a3a64e32 --- /dev/null +++ b/newsfragments/3326.installation @@ -0,0 +1 @@ +Debian 8 support has been replaced with Debian 10 support. diff --git a/newsfragments/3588.minor b/newsfragments/3588.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3592.minor b/newsfragments/3592.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3593.minor b/newsfragments/3593.minor new file mode 100644 index 000000000..e69de29bb diff --git a/src/allmydata/test/_win_subprocess.py b/src/allmydata/test/_win_subprocess.py new file mode 100644 index 000000000..fe6960c73 --- /dev/null +++ b/src/allmydata/test/_win_subprocess.py @@ -0,0 +1,197 @@ +# -*- coding: utf-8 -*- + +## Copyright (C) 2021 Valentin Lab +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted provided that the following conditions +## are met: +## +## 1. Redistributions of source code must retain the above copyright +## notice, this list of conditions and the following disclaimer. +## +## 2. Redistributions in binary form must reproduce the above +## copyright notice, this list of conditions and the following +## disclaimer in the documentation and/or other materials provided +## with the distribution. +## +## 3. Neither the name of the copyright holder nor the names of its +## contributors may be used to endorse or promote products derived +## from this software without specific prior written permission. +## +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +## LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +## FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +## COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +## INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +## (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +## SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +## STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +## ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +## OF THE POSSIBILITY OF SUCH DAMAGE. +## + +## issue: https://bugs.python.org/issue19264 + +# See allmydata/windows/fixups.py +import sys +assert sys.platform == "win32" + +import os +import ctypes +import subprocess +import _subprocess +from ctypes import byref, windll, c_char_p, c_wchar_p, c_void_p, \ + Structure, sizeof, c_wchar, WinError +from ctypes.wintypes import BYTE, WORD, LPWSTR, BOOL, DWORD, LPVOID, \ + HANDLE + + +## +## Types +## + +CREATE_UNICODE_ENVIRONMENT = 0x00000400 +LPCTSTR = c_char_p +LPTSTR = c_wchar_p +LPSECURITY_ATTRIBUTES = c_void_p +LPBYTE = ctypes.POINTER(BYTE) + +class STARTUPINFOW(Structure): + _fields_ = [ + ("cb", DWORD), ("lpReserved", LPWSTR), + ("lpDesktop", LPWSTR), ("lpTitle", LPWSTR), + ("dwX", DWORD), ("dwY", DWORD), + ("dwXSize", DWORD), ("dwYSize", DWORD), + ("dwXCountChars", DWORD), ("dwYCountChars", DWORD), + ("dwFillAtrribute", DWORD), ("dwFlags", DWORD), + ("wShowWindow", WORD), ("cbReserved2", WORD), + ("lpReserved2", LPBYTE), ("hStdInput", HANDLE), + ("hStdOutput", HANDLE), ("hStdError", HANDLE), + ] + +LPSTARTUPINFOW = ctypes.POINTER(STARTUPINFOW) + + +class PROCESS_INFORMATION(Structure): + _fields_ = [ + ("hProcess", HANDLE), ("hThread", HANDLE), + ("dwProcessId", DWORD), ("dwThreadId", DWORD), + ] + +LPPROCESS_INFORMATION = ctypes.POINTER(PROCESS_INFORMATION) + + +class DUMMY_HANDLE(ctypes.c_void_p): + + def __init__(self, *a, **kw): + super(DUMMY_HANDLE, self).__init__(*a, **kw) + self.closed = False + + def Close(self): + if not self.closed: + windll.kernel32.CloseHandle(self) + self.closed = True + + def __int__(self): + return self.value + + +CreateProcessW = windll.kernel32.CreateProcessW +CreateProcessW.argtypes = [ + LPCTSTR, LPTSTR, LPSECURITY_ATTRIBUTES, + LPSECURITY_ATTRIBUTES, BOOL, DWORD, LPVOID, LPCTSTR, + LPSTARTUPINFOW, LPPROCESS_INFORMATION, +] +CreateProcessW.restype = BOOL + + +## +## Patched functions/classes +## + +def CreateProcess(executable, args, _p_attr, _t_attr, + inherit_handles, creation_flags, env, cwd, + startup_info): + """Create a process supporting unicode executable and args for win32 + + Python implementation of CreateProcess using CreateProcessW for Win32 + + """ + + si = STARTUPINFOW( + dwFlags=startup_info.dwFlags, + wShowWindow=startup_info.wShowWindow, + cb=sizeof(STARTUPINFOW), + ## XXXvlab: not sure of the casting here to ints. + hStdInput=int(startup_info.hStdInput), + hStdOutput=int(startup_info.hStdOutput), + hStdError=int(startup_info.hStdError), + ) + + wenv = None + if env is not None: + ## LPCWSTR seems to be c_wchar_p, so let's say CWSTR is c_wchar + env = (unicode("").join([ + unicode("%s=%s\0") % (k, v) + for k, v in env.items()])) + unicode("\0") + wenv = (c_wchar * len(env))() + wenv.value = env + + pi = PROCESS_INFORMATION() + creation_flags |= CREATE_UNICODE_ENVIRONMENT + + if CreateProcessW(executable, args, None, None, + inherit_handles, creation_flags, + wenv, cwd, byref(si), byref(pi)): + return (DUMMY_HANDLE(pi.hProcess), DUMMY_HANDLE(pi.hThread), + pi.dwProcessId, pi.dwThreadId) + raise WinError() + + +class Popen(subprocess.Popen): + """This superseeds Popen and corrects a bug in cPython 2.7 implem""" + + def _execute_child(self, args, executable, preexec_fn, close_fds, + cwd, env, universal_newlines, + startupinfo, creationflags, shell, to_close, + p2cread, p2cwrite, + c2pread, c2pwrite, + errread, errwrite): + """Code from part of _execute_child from Python 2.7 (9fbb65e) + + There are only 2 little changes concerning the construction of + the the final string in shell mode: we preempt the creation of + the command string when shell is True, because original function + will try to encode unicode args which we want to avoid to be able to + sending it as-is to ``CreateProcess``. + + """ + if not isinstance(args, subprocess.types.StringTypes): + args = subprocess.list2cmdline(args) + + if startupinfo is None: + startupinfo = subprocess.STARTUPINFO() + if shell: + startupinfo.dwFlags |= _subprocess.STARTF_USESHOWWINDOW + startupinfo.wShowWindow = _subprocess.SW_HIDE + comspec = os.environ.get("COMSPEC", unicode("cmd.exe")) + args = unicode('{} /c "{}"').format(comspec, args) + if (_subprocess.GetVersion() >= 0x80000000 or + os.path.basename(comspec).lower() == "command.com"): + w9xpopen = self._find_w9xpopen() + args = unicode('"%s" %s') % (w9xpopen, args) + creationflags |= _subprocess.CREATE_NEW_CONSOLE + + cp = _subprocess.CreateProcess + _subprocess.CreateProcess = CreateProcess + try: + super(Popen, self)._execute_child( + args, executable, + preexec_fn, close_fds, cwd, env, universal_newlines, + startupinfo, creationflags, False, to_close, p2cread, + p2cwrite, c2pread, c2pwrite, errread, errwrite, + ) + finally: + _subprocess.CreateProcess = cp diff --git a/src/allmydata/test/test_hashutil.py b/src/allmydata/test/test_hashutil.py index 6ec861c9f..482e79c0b 100644 --- a/src/allmydata/test/test_hashutil.py +++ b/src/allmydata/test/test_hashutil.py @@ -126,6 +126,42 @@ class HashUtilTests(unittest.TestCase): base32.a2b(b"2ckv3dfzh6rgjis6ogfqhyxnzy"), ) + def test_convergence_hasher_tag(self): + """ + ``_convergence_hasher_tag`` constructs the convergence hasher tag from a + unique prefix, the required, total, and segment size parameters, and a + convergence secret. + """ + self.assertEqual( + b"allmydata_immutable_content_to_key_with_added_secret_v1+" + b"16:\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42\x42," + b"9:3,10,1024,", + hashutil._convergence_hasher_tag( + k=3, + n=10, + segsize=1024, + convergence=b"\x42" * 16, + ), + ) + + def test_convergence_hasher_out_of_bounds(self): + """ + ``_convergence_hasher_tag`` raises ``ValueError`` if k or n is not between + 1 and 256 inclusive or if k is greater than n. + """ + segsize = 1024 + secret = b"\x42" * 16 + for bad_k in (0, 2, 257): + with self.assertRaises(ValueError): + hashutil._convergence_hasher_tag( + k=bad_k, n=1, segsize=segsize, convergence=secret, + ) + for bad_n in (0, 1, 257): + with self.assertRaises(ValueError): + hashutil._convergence_hasher_tag( + k=2, n=bad_n, segsize=segsize, convergence=secret, + ) + def test_known_answers(self): """ Verify backwards compatibility by comparing hash outputs for some diff --git a/src/allmydata/test/test_python2_regressions.py b/src/allmydata/test/test_python2_regressions.py index 5c6a654c1..fc9ebe17a 100644 --- a/src/allmydata/test/test_python2_regressions.py +++ b/src/allmydata/test/test_python2_regressions.py @@ -14,9 +14,11 @@ from testtools.matchers import ( ) BLACKLIST = { - "allmydata.test.check_load", - "allmydata.windows.registry", "allmydata.scripts.types_", + "allmydata.test.check_load", + "allmydata.test._win_subprocess", + "allmydata.windows.registry", + "allmydata.windows.fixups", } diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 07ede2074..fc9bfd697 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -888,6 +888,34 @@ def is_happy_enough(servertoshnums, h, k): return True +class FileHandleTests(unittest.TestCase): + """ + Tests for ``FileHandle``. + """ + def test_get_encryption_key_convergent(self): + """ + When ``FileHandle`` is initialized with a convergence secret, + ``FileHandle.get_encryption_key`` returns a deterministic result that + is a function of that secret. + """ + secret = b"\x42" * 16 + handle = upload.FileHandle(BytesIO(b"hello world"), secret) + handle.set_default_encoding_parameters({ + "k": 3, + "happy": 5, + "n": 10, + # Remember this is the *max* segment size. In reality, the data + # size is much smaller so the actual segment size incorporated + # into the encryption key is also smaller. + "max_segment_size": 128 * 1024, + }) + + self.assertEqual( + b64encode(self.successResultOf(handle.get_encryption_key())), + b"oBcuR/wKdCgCV2GKKXqiNg==", + ) + + class EncodingParameters(GridTestMixin, unittest.TestCase, SetDEPMixin, ShouldFailMixin): diff --git a/src/allmydata/test/test_windows.py b/src/allmydata/test/test_windows.py new file mode 100644 index 000000000..f2c1318c5 --- /dev/null +++ b/src/allmydata/test/test_windows.py @@ -0,0 +1,235 @@ +# -*- coding: utf-8 -*- +# Tahoe-LAFS -- secure, distributed storage grid +# +# Copyright © 2020 The Tahoe-LAFS Software Foundation +# +# This file is part of Tahoe-LAFS. +# +# See the docs/about.rst file for licensing information. + +""" +Tests for the ``allmydata.windows``. +""" + +from __future__ import division +from __future__ import absolute_import +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + +from sys import ( + executable, +) +from json import ( + load, +) +from textwrap import ( + dedent, +) +from subprocess import ( + PIPE, + Popen, +) + +from twisted.python.filepath import ( + FilePath, +) +from twisted.python.runtime import ( + platform, +) + +from testtools import ( + skipUnless, +) + +from testtools.matchers import ( + MatchesAll, + AllMatch, + IsInstance, + Equals, +) + +from hypothesis import ( + HealthCheck, + settings, + given, + note, +) + +from hypothesis.strategies import ( + lists, + text, + characters, +) + +from .common import ( + SyncTestCase, +) + +slow_settings = settings( + suppress_health_check=[HealthCheck.too_slow], + deadline=None, + + # Reduce the number of examples required to consider the test a success. + # The default is 100. Launching a process is expensive so we'll try to do + # it as few times as we can get away with. To maintain good coverage, + # we'll try to pass as much data to each process as we can so we're still + # covering a good portion of the space. + max_examples=10, +) + +@skipUnless(platform.isWindows(), "get_argv is Windows-only") +class GetArgvTests(SyncTestCase): + """ + Tests for ``get_argv``. + """ + def test_get_argv_return_type(self): + """ + ``get_argv`` returns a list of unicode strings + """ + # Hide the ``allmydata.windows.fixups.get_argv`` import here so it + # doesn't cause failures on non-Windows platforms. + from ..windows.fixups import ( + get_argv, + ) + argv = get_argv() + + # We don't know what this process's command line was so we just make + # structural assertions here. + self.assertThat( + argv, + MatchesAll( + IsInstance(list), + AllMatch(IsInstance(str)), + ), + ) + + # This test runs a child process. This is unavoidably slow and variable. + # Disable the two time-based Hypothesis health checks. + @slow_settings + @given( + lists( + text( + alphabet=characters( + blacklist_categories=('Cs',), + # Windows CommandLine is a null-terminated string, + # analogous to POSIX exec* arguments. So exclude nul from + # our generated arguments. + blacklist_characters=('\x00',), + ), + min_size=10, + max_size=20, + ), + min_size=10, + max_size=20, + ), + ) + def test_argv_values(self, argv): + """ + ``get_argv`` returns a list representing the result of tokenizing the + "command line" argument string provided to Windows processes. + """ + # Python 2.7 doesn't have good options for launching a process with + # non-ASCII in its command line. So use this alternative that does a + # better job. Bury the import here because it only works on Windows. + from ._win_subprocess import ( + Popen + ) + + working_path = FilePath(self.mktemp()) + working_path.makedirs() + save_argv_path = working_path.child("script.py") + saved_argv_path = working_path.child("data.json") + with open(save_argv_path.path, "wt") as f: + # A simple program to save argv to a file. Using the file saves + # us having to figure out how to reliably get non-ASCII back over + # stdio which may pose an independent set of challenges. At least + # file I/O is relatively simple and well-understood. + f.write(dedent( + """ + from allmydata.windows.fixups import ( + get_argv, + ) + import json + with open({!r}, "wt") as f: + f.write(json.dumps(get_argv())) + """.format(saved_argv_path.path)), + ) + argv = [executable.decode("utf-8"), save_argv_path.path] + argv + p = Popen(argv, stdin=PIPE, stdout=PIPE, stderr=PIPE) + p.stdin.close() + stdout = p.stdout.read() + stderr = p.stderr.read() + returncode = p.wait() + + note("stdout: {!r}".format(stdout)) + note("stderr: {!r}".format(stderr)) + + self.assertThat( + returncode, + Equals(0), + ) + with open(saved_argv_path.path, "rt") as f: + saved_argv = load(f) + + self.assertThat( + saved_argv, + Equals(argv), + ) + + +@skipUnless(platform.isWindows(), "intended for Windows-only codepaths") +class UnicodeOutputTests(SyncTestCase): + """ + Tests for writing unicode to stdout and stderr. + """ + @slow_settings + @given(characters(), characters()) + def test_write_non_ascii(self, stdout_char, stderr_char): + """ + Non-ASCII unicode characters can be written to stdout and stderr with + automatic UTF-8 encoding. + """ + working_path = FilePath(self.mktemp()) + working_path.makedirs() + script = working_path.child("script.py") + script.setContent(dedent( + """ + from future.utils import PY2 + if PY2: + from future.builtins import chr + + from allmydata.windows.fixups import initialize + initialize() + + # XXX A shortcoming of the monkey-patch approach is that you'd + # better not import stdout or stderr before you call initialize. + from sys import argv, stdout, stderr + + stdout.write(chr(int(argv[1]))) + stdout.close() + stderr.write(chr(int(argv[2]))) + stderr.close() + """ + )) + p = Popen([ + executable, + script.path, + str(ord(stdout_char)), + str(ord(stderr_char)), + ], stdout=PIPE, stderr=PIPE) + stdout = p.stdout.read().decode("utf-8").replace("\r\n", "\n") + stderr = p.stderr.read().decode("utf-8").replace("\r\n", "\n") + returncode = p.wait() + + self.assertThat( + (stdout, stderr, returncode), + Equals(( + stdout_char, + stderr_char, + 0, + )), + ) diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index ebb2f12af..8525dd95e 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -176,10 +176,44 @@ def convergence_hash(k, n, segsize, data, convergence): return h.digest() -def convergence_hasher(k, n, segsize, convergence): +def _convergence_hasher_tag(k, n, segsize, convergence): + """ + Create the convergence hashing tag. + + :param int k: Required shares (in [1..256]). + :param int n: Total shares (in [1..256]). + :param int segsize: Maximum segment size. + :param bytes convergence: The convergence secret. + + :return bytes: The bytestring to use as a tag in the convergence hash. + """ assert isinstance(convergence, bytes) + if k > n: + raise ValueError( + "k > n not allowed; k = {}, n = {}".format(k, n), + ) + if k < 1 or n < 1: + # It doesn't make sense to have zero shares. Zero shares carry no + # information, cannot encode any part of the application data. + raise ValueError( + "k, n < 1 not allowed; k = {}, n = {}".format(k, n), + ) + if k > 256 or n > 256: + # ZFEC supports encoding application data into a maximum of 256 + # shares. If we ignore the limitations of ZFEC, it may be fine to use + # a configuration with more shares than that and it may be fine to + # construct a convergence tag from such a configuration. Since ZFEC + # is the only supported encoder, though, this is moot for now. + raise ValueError( + "k, n > 256 not allowed; k = {}, n = {}".format(k, n), + ) param_tag = netstring(b"%d,%d,%d" % (k, n, segsize)) tag = CONVERGENT_ENCRYPTION_TAG + netstring(convergence) + param_tag + return tag + + +def convergence_hasher(k, n, segsize, convergence): + tag = _convergence_hasher_tag(k, n, segsize, convergence) return tagged_hasher(tag, KEYLEN) diff --git a/src/allmydata/windows/fixups.py b/src/allmydata/windows/fixups.py index e98aa8a67..0d1ed2717 100644 --- a/src/allmydata/windows/fixups.py +++ b/src/allmydata/windows/fixups.py @@ -1,29 +1,123 @@ from __future__ import print_function -done = False +# This code isn't loadable or sensible except on Windows. Importers all know +# this and are careful. Normally I would just let an import error from ctypes +# explain any mistakes but Mypy also needs some help here. This assert +# explains to it that this module is Windows-only. This prevents errors about +# ctypes.windll and such which only exist when running on Windows. +# +# Beware of the limitations of the Mypy AST analyzer. The check needs to take +# exactly this form or it may not be recognized. +# +# https://mypy.readthedocs.io/en/stable/common_issues.html?highlight=platform#python-version-and-system-platform-checks +import sys +assert sys.platform == "win32" + +import codecs, re +from functools import partial + +from ctypes import WINFUNCTYPE, windll, POINTER, c_int, WinError, byref, get_last_error +from ctypes.wintypes import BOOL, HANDLE, DWORD, LPWSTR, LPCWSTR, LPVOID + +# +from win32api import ( + STD_OUTPUT_HANDLE, + STD_ERROR_HANDLE, + SetErrorMode, + + # + # HANDLE WINAPI GetStdHandle(DWORD nStdHandle); + # returns INVALID_HANDLE_VALUE, NULL, or a valid handle + GetStdHandle, +) +from win32con import ( + SEM_FAILCRITICALERRORS, + SEM_NOOPENFILEERRORBOX, +) + +from win32file import ( + INVALID_HANDLE_VALUE, + FILE_TYPE_CHAR, + + # + # DWORD WINAPI GetFileType(DWORD hFile); + GetFileType, +) + +from allmydata.util import ( + log, +) + +# Keep track of whether `initialize` has run so we don't do any of the +# initialization more than once. +_done = False + +# +# pywin32 for Python 2.7 does not bind any of these *W variants so we do it +# ourselves. +# + +# +# BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, +# LPDWORD lpCharsWritten, LPVOID lpReserved); +WriteConsoleW = WINFUNCTYPE( + BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID, + use_last_error=True +)(("WriteConsoleW", windll.kernel32)) + +# +GetCommandLineW = WINFUNCTYPE( + LPWSTR, + use_last_error=True +)(("GetCommandLineW", windll.kernel32)) + +# +CommandLineToArgvW = WINFUNCTYPE( + POINTER(LPWSTR), LPCWSTR, POINTER(c_int), + use_last_error=True +)(("CommandLineToArgvW", windll.shell32)) + +# +# BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); +GetConsoleMode = WINFUNCTYPE( + BOOL, HANDLE, POINTER(DWORD), + use_last_error=True +)(("GetConsoleMode", windll.kernel32)) + + +STDOUT_FILENO = 1 +STDERR_FILENO = 2 + +def get_argv(): + """ + :return [unicode]: The argument list this process was invoked with, as + unicode. + + Python 2 does not do a good job exposing this information in + ``sys.argv`` on Windows so this code re-retrieves the underlying + information using Windows API calls and massages it into the right + shape. + """ + command_line = GetCommandLineW() + argc = c_int(0) + argv_unicode = CommandLineToArgvW(command_line, byref(argc)) + if argv_unicode is None: + raise WinError(get_last_error()) + + # Convert it to a normal Python list + return list( + argv_unicode[i] + for i + in range(argc.value) + ) + def initialize(): - global done + global _done import sys - if sys.platform != "win32" or done: + if sys.platform != "win32" or _done: return True - done = True - - import codecs, re - from ctypes import WINFUNCTYPE, WinError, windll, POINTER, byref, c_int, get_last_error - from ctypes.wintypes import BOOL, HANDLE, DWORD, UINT, LPWSTR, LPCWSTR, LPVOID - - from allmydata.util import log - from allmydata.util.encodingutil import canonical_encoding - - # - SetErrorMode = WINFUNCTYPE( - UINT, UINT, - use_last_error=True - )(("SetErrorMode", windll.kernel32)) - - SEM_FAILCRITICALERRORS = 0x0001 - SEM_NOOPENFILEERRORBOX = 0x8000 + _done = True SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) @@ -33,10 +127,12 @@ def initialize(): # which makes for frustrating debugging if stderr is directed to our wrapper. # So be paranoid about catching errors and reporting them to original_stderr, # so that we can at least see them. - def _complain(message): - print(isinstance(message, str) and message or repr(message), file=original_stderr) + def _complain(output_file, message): + print(isinstance(message, str) and message or repr(message), file=output_file) log.msg(message, level=log.WEIRD) + _complain = partial(_complain, original_stderr) + # Work around . codecs.register(lambda name: name == 'cp65001' and codecs.lookup('utf-8') or None) @@ -46,45 +142,6 @@ def initialize(): # and TZOmegaTZIOY # . try: - # - # HANDLE WINAPI GetStdHandle(DWORD nStdHandle); - # returns INVALID_HANDLE_VALUE, NULL, or a valid handle - # - # - # DWORD WINAPI GetFileType(DWORD hFile); - # - # - # BOOL WINAPI GetConsoleMode(HANDLE hConsole, LPDWORD lpMode); - - GetStdHandle = WINFUNCTYPE( - HANDLE, DWORD, - use_last_error=True - )(("GetStdHandle", windll.kernel32)) - - STD_OUTPUT_HANDLE = DWORD(-11) - STD_ERROR_HANDLE = DWORD(-12) - - GetFileType = WINFUNCTYPE( - DWORD, DWORD, - use_last_error=True - )(("GetFileType", windll.kernel32)) - - FILE_TYPE_CHAR = 0x0002 - FILE_TYPE_REMOTE = 0x8000 - - GetConsoleMode = WINFUNCTYPE( - BOOL, HANDLE, POINTER(DWORD), - use_last_error=True - )(("GetConsoleMode", windll.kernel32)) - - INVALID_HANDLE_VALUE = DWORD(-1).value - - def not_a_console(handle): - if handle == INVALID_HANDLE_VALUE or handle is None: - return True - return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR - or GetConsoleMode(handle, byref(DWORD())) == 0) - old_stdout_fileno = None old_stderr_fileno = None if hasattr(sys.stdout, 'fileno'): @@ -92,126 +149,33 @@ def initialize(): if hasattr(sys.stderr, 'fileno'): old_stderr_fileno = sys.stderr.fileno() - STDOUT_FILENO = 1 - STDERR_FILENO = 2 real_stdout = (old_stdout_fileno == STDOUT_FILENO) real_stderr = (old_stderr_fileno == STDERR_FILENO) if real_stdout: hStdout = GetStdHandle(STD_OUTPUT_HANDLE) - if not_a_console(hStdout): + if not a_console(hStdout): real_stdout = False if real_stderr: hStderr = GetStdHandle(STD_ERROR_HANDLE) - if not_a_console(hStderr): + if not a_console(hStderr): real_stderr = False - if real_stdout or real_stderr: - # - # BOOL WINAPI WriteConsoleW(HANDLE hOutput, LPWSTR lpBuffer, DWORD nChars, - # LPDWORD lpCharsWritten, LPVOID lpReserved); + if real_stdout: + sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '', _complain) + else: + sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '', _complain) - WriteConsoleW = WINFUNCTYPE( - BOOL, HANDLE, LPWSTR, DWORD, POINTER(DWORD), LPVOID, - use_last_error=True - )(("WriteConsoleW", windll.kernel32)) - - class UnicodeOutput(object): - def __init__(self, hConsole, stream, fileno, name): - self._hConsole = hConsole - self._stream = stream - self._fileno = fileno - self.closed = False - self.softspace = False - self.mode = 'w' - self.encoding = 'utf-8' - self.name = name - if hasattr(stream, 'encoding') and canonical_encoding(stream.encoding) != 'utf-8': - log.msg("%s: %r had encoding %r, but we're going to write UTF-8 to it" % - (name, stream, stream.encoding), level=log.CURIOUS) - self.flush() - - def isatty(self): - return False - def close(self): - # don't really close the handle, that would only cause problems - self.closed = True - def fileno(self): - return self._fileno - def flush(self): - if self._hConsole is None: - try: - self._stream.flush() - except Exception as e: - _complain("%s.flush: %r from %r" % (self.name, e, self._stream)) - raise - - def write(self, text): - try: - if self._hConsole is None: - if isinstance(text, unicode): - text = text.encode('utf-8') - self._stream.write(text) - else: - if not isinstance(text, unicode): - text = str(text).decode('utf-8') - remaining = len(text) - while remaining > 0: - n = DWORD(0) - # There is a shorter-than-documented limitation on the length of the string - # passed to WriteConsoleW (see #1232). - retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None) - if retval == 0: - raise IOError("WriteConsoleW failed with WinError: %s" % (WinError(get_last_error()),)) - if n.value == 0: - raise IOError("WriteConsoleW returned %r, n.value = 0" % (retval,)) - remaining -= n.value - if remaining == 0: break - text = text[n.value:] - except Exception as e: - _complain("%s.write: %r" % (self.name, e)) - raise - - def writelines(self, lines): - try: - for line in lines: - self.write(line) - except Exception as e: - _complain("%s.writelines: %r" % (self.name, e)) - raise - - if real_stdout: - sys.stdout = UnicodeOutput(hStdout, None, STDOUT_FILENO, '') - else: - sys.stdout = UnicodeOutput(None, sys.stdout, old_stdout_fileno, '') - - if real_stderr: - sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '') - else: - sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '') + if real_stderr: + sys.stderr = UnicodeOutput(hStderr, None, STDERR_FILENO, '', _complain) + else: + sys.stderr = UnicodeOutput(None, sys.stderr, old_stderr_fileno, '', _complain) except Exception as e: _complain("exception %r while fixing up sys.stdout and sys.stderr" % (e,)) # This works around . - # - GetCommandLineW = WINFUNCTYPE( - LPWSTR, - use_last_error=True - )(("GetCommandLineW", windll.kernel32)) - - # - CommandLineToArgvW = WINFUNCTYPE( - POINTER(LPWSTR), LPCWSTR, POINTER(c_int), - use_last_error=True - )(("CommandLineToArgvW", windll.shell32)) - - argc = c_int(0) - argv_unicode = CommandLineToArgvW(GetCommandLineW(), byref(argc)) - if argv_unicode is None: - raise WinError(get_last_error()) - # Because of (and similar limitations in # twisted), the 'bin/tahoe' script cannot invoke us with the actual Unicode arguments. # Instead it "mangles" or escapes them using \x7F as an escape character, which we @@ -224,11 +188,12 @@ def initialize(): s, ) + argv_unicode = get_argv() try: - argv = [unmangle(argv_unicode[i]).encode('utf-8') for i in xrange(0, argc.value)] + argv = [unmangle(argv_u).encode('utf-8') for argv_u in argv_unicode] except Exception as e: _complain("%s: could not unmangle Unicode arguments.\n%r" - % (sys.argv[0], [argv_unicode[i] for i in xrange(0, argc.value)])) + % (sys.argv[0], argv_unicode)) raise # Take only the suffix with the same number of arguments as sys.argv. @@ -240,3 +205,112 @@ def initialize(): sys.argv = argv[-len(sys.argv):] if sys.argv[0].endswith('.pyscript'): sys.argv[0] = sys.argv[0][:-9] + + +def a_console(handle): + """ + :return: ``True`` if ``handle`` refers to a console, ``False`` otherwise. + """ + if handle == INVALID_HANDLE_VALUE: + return False + return ( + # It's a character file (eg a printer or a console) + GetFileType(handle) == FILE_TYPE_CHAR and + # Checking the console mode doesn't fail (thus it's a console) + GetConsoleMode(handle, byref(DWORD())) != 0 + ) + + +class UnicodeOutput(object): + """ + ``UnicodeOutput`` is a file-like object that encodes unicode to UTF-8 and + writes it to another file or writes unicode natively to the Windows + console. + """ + def __init__(self, hConsole, stream, fileno, name, _complain): + """ + :param hConsole: ``None`` or a handle on the console to which to write + unicode. Mutually exclusive with ``stream``. + + :param stream: ``None`` or a file-like object to which to write bytes. + + :param fileno: A result to hand back from method of the same name. + + :param name: A human-friendly identifier for this output object. + + :param _complain: A one-argument callable which accepts bytes to be + written when there's a problem. Care should be taken to not make + this do a write on this object. + """ + self._hConsole = hConsole + self._stream = stream + self._fileno = fileno + self.closed = False + self.softspace = False + self.mode = 'w' + self.encoding = 'utf-8' + self.name = name + + self._complain = _complain + + from allmydata.util.encodingutil import canonical_encoding + from allmydata.util import log + if hasattr(stream, 'encoding') and canonical_encoding(stream.encoding) != 'utf-8': + log.msg("%s: %r had encoding %r, but we're going to write UTF-8 to it" % + (name, stream, stream.encoding), level=log.CURIOUS) + self.flush() + + def isatty(self): + return False + def close(self): + # don't really close the handle, that would only cause problems + self.closed = True + def fileno(self): + return self._fileno + def flush(self): + if self._hConsole is None: + try: + self._stream.flush() + except Exception as e: + self._complain("%s.flush: %r from %r" % (self.name, e, self._stream)) + raise + + def write(self, text): + try: + if self._hConsole is None: + # There is no Windows console available. That means we are + # responsible for encoding the unicode to a byte string to + # write it to a Python file object. + if isinstance(text, unicode): + text = text.encode('utf-8') + self._stream.write(text) + else: + # There is a Windows console available. That means Windows is + # responsible for dealing with the unicode itself. + if not isinstance(text, unicode): + text = str(text).decode('utf-8') + remaining = len(text) + while remaining > 0: + n = DWORD(0) + # There is a shorter-than-documented limitation on the + # length of the string passed to WriteConsoleW (see + # #1232). + retval = WriteConsoleW(self._hConsole, text, min(remaining, 10000), byref(n), None) + if retval == 0: + raise IOError("WriteConsoleW failed with WinError: %s" % (WinError(get_last_error()),)) + if n.value == 0: + raise IOError("WriteConsoleW returned %r, n.value = 0" % (retval,)) + remaining -= n.value + if remaining == 0: break + text = text[n.value:] + except Exception as e: + self._complain("%s.write: %r" % (self.name, e)) + raise + + def writelines(self, lines): + try: + for line in lines: + self.write(line) + except Exception as e: + self._complain("%s.writelines: %r" % (self.name, e)) + raise