diff --git a/docs/configuration.rst b/docs/configuration.rst index 540107cbb..2c0746ba2 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -75,7 +75,7 @@ The item descriptions below use the following types: Node Types ========== -A node can be a client/server, an introducer, or a statistics gatherer. +A node can be a client/server or an introducer. Client/server nodes provide one or more of the following services: @@ -593,11 +593,6 @@ Client Configuration If provided, the node will attempt to connect to and use the given helper for uploads. See :doc:`helper` for details. -``stats_gatherer.furl = (FURL string, optional)`` - - If provided, the node will connect to the given stats gatherer and - provide it with operational statistics. - ``shares.needed = (int, optional) aka "k", default 3`` ``shares.total = (int, optional) aka "N", N >= k, default 10`` diff --git a/docs/stats.rst b/docs/stats.rst index 200523d07..50642d816 100644 --- a/docs/stats.rst +++ b/docs/stats.rst @@ -242,19 +242,6 @@ The currently available stats (as of release 1.6.0 or so) are described here: the process was started. Ticket #472 indicates that .total may sometimes be negative due to wraparound of the kernel's counter. -**stats.load_monitor.\*** - - When enabled, the "load monitor" continually schedules a one-second - callback, and measures how late the response is. This estimates system load - (if the system is idle, the response should be on time). This is only - enabled if a stats-gatherer is configured. - - avg_load - average "load" value (seconds late) over the last minute - - max_load - maximum "load" value over the last minute - Using Munin To Graph Stats Values ================================= diff --git a/newsfragments/3522.minor b/newsfragments/3522.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3546.minor b/newsfragments/3546.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3549.removed b/newsfragments/3549.removed index bf152cfb0..53c7a7de1 100644 --- a/newsfragments/3549.removed +++ b/newsfragments/3549.removed @@ -1 +1 @@ -The stats gatherer has been removed. The ``[client]stats_gatherer.furl`` configuration item in ``tahoe.cfg`` is no longer allowed. +The stats gatherer, broken since at least Tahoe-LAFS 1.13.0, has been removed. The ``[client]stats_gatherer.furl`` configuration item in ``tahoe.cfg`` is no longer allowed. The Tahoe-LAFS project recommends using a third-party metrics aggregation tool instead. diff --git a/newsfragments/3551.minor b/newsfragments/3551.minor new file mode 100644 index 000000000..e69de29bb diff --git a/src/allmydata/immutable/checker.py b/src/allmydata/immutable/checker.py index 2bed90e1c..9636b9a2f 100644 --- a/src/allmydata/immutable/checker.py +++ b/src/allmydata/immutable/checker.py @@ -1,3 +1,15 @@ +""" +Ported to Python 3. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + from zope.interface import implementer from twisted.internet import defer from foolscap.api import DeadReferenceError, RemoteException diff --git a/src/allmydata/immutable/repairer.py b/src/allmydata/immutable/repairer.py index 1d3782d10..bccd8453d 100644 --- a/src/allmydata/immutable/repairer.py +++ b/src/allmydata/immutable/repairer.py @@ -1,3 +1,15 @@ +""" +Ported to Python 3. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + from zope.interface import implementer from twisted.internet import defer from allmydata.storage.server import si_b2a diff --git a/src/allmydata/introducer/client.py b/src/allmydata/introducer/client.py index f54595221..fa1e1efe8 100644 --- a/src/allmydata/introducer/client.py +++ b/src/allmydata/introducer/client.py @@ -1,4 +1,16 @@ -from past.builtins import unicode, long +""" +Ported to Python 3. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 +from past.builtins import long + from six import ensure_text import time @@ -27,11 +39,11 @@ class IntroducerClient(service.Service, Referenceable): nickname, my_version, oldest_supported, sequencer, cache_filepath): self._tub = tub - if isinstance(introducer_furl, unicode): + if isinstance(introducer_furl, str): introducer_furl = introducer_furl.encode("utf-8") self.introducer_furl = introducer_furl - assert type(nickname) is unicode + assert isinstance(nickname, str) self._nickname = nickname self._my_version = my_version self._oldest_supported = oldest_supported @@ -114,7 +126,7 @@ class IntroducerClient(service.Service, Referenceable): def _save_announcements(self): announcements = [] - for _, value in self._inbound_announcements.items(): + for value in self._inbound_announcements.values(): ann, key_s, time_stamp = value # On Python 2, bytes strings are encoded into YAML Unicode strings. # On Python 3, bytes are encoded as YAML bytes. To minimize @@ -125,7 +137,7 @@ class IntroducerClient(service.Service, Referenceable): } announcements.append(server_params) announcement_cache_yaml = yamlutil.safe_dump(announcements) - if isinstance(announcement_cache_yaml, unicode): + if isinstance(announcement_cache_yaml, str): announcement_cache_yaml = announcement_cache_yaml.encode("utf-8") self._cache_filepath.setContent(announcement_cache_yaml) @@ -170,7 +182,7 @@ class IntroducerClient(service.Service, Referenceable): self._local_subscribers.append( (service_name,cb,args,kwargs) ) self._subscribed_service_names.add(service_name) self._maybe_subscribe() - for index,(ann,key_s,when) in self._inbound_announcements.items(): + for index,(ann,key_s,when) in list(self._inbound_announcements.items()): precondition(isinstance(key_s, bytes), key_s) servicename = index[0] if servicename == service_name: @@ -215,7 +227,7 @@ class IntroducerClient(service.Service, Referenceable): self._outbound_announcements[service_name] = ann_d # publish all announcements with the new seqnum and nonce - for service_name,ann_d in self._outbound_announcements.items(): + for service_name,ann_d in list(self._outbound_announcements.items()): ann_d["seqnum"] = current_seqnum ann_d["nonce"] = current_nonce ann_t = sign_to_foolscap(ann_d, signing_key) @@ -227,7 +239,7 @@ class IntroducerClient(service.Service, Referenceable): self.log("want to publish, but no introducer yet", level=log.NOISY) return # this re-publishes everything. The Introducer ignores duplicates - for ann_t in self._published_announcements.values(): + for ann_t in list(self._published_announcements.values()): self._debug_counts["outbound_message"] += 1 self._debug_outstanding += 1 d = self._publisher.callRemote("publish_v2", ann_t, self._canary) @@ -267,7 +279,7 @@ class IntroducerClient(service.Service, Referenceable): return # for ASCII values, simplejson might give us unicode *or* bytes if "nickname" in ann and isinstance(ann["nickname"], bytes): - ann["nickname"] = unicode(ann["nickname"]) + ann["nickname"] = str(ann["nickname"]) nick_s = ann.get("nickname",u"").encode("utf-8") lp2 = self.log(format="announcement for nickname '%(nick)s', service=%(svc)s: %(ann)s", nick=nick_s, svc=service_name, ann=ann, umid="BoKEag") diff --git a/src/allmydata/introducer/common.py b/src/allmydata/introducer/common.py index 8ccb3c2e5..f67aad203 100644 --- a/src/allmydata/introducer/common.py +++ b/src/allmydata/introducer/common.py @@ -1,3 +1,15 @@ +""" +Ported to Python 3. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + import re from allmydata.crypto.util import remove_prefix from allmydata.crypto import ed25519 diff --git a/src/allmydata/introducer/server.py b/src/allmydata/introducer/server.py index e41bff14b..cd3d4a68a 100644 --- a/src/allmydata/introducer/server.py +++ b/src/allmydata/introducer/server.py @@ -1,5 +1,18 @@ +""" +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from past.builtins import long -from six import ensure_str, ensure_text +from six import ensure_text import time, os.path, textwrap from zope.interface import implementer @@ -157,7 +170,7 @@ class IntroducerService(service.MultiService, Referenceable): # 'subscriber_info' is a dict, provided directly by v2 clients. The # expected keys are: version, nickname, app-versions, my-version, # oldest-supported - self._subscribers = {} + self._subscribers = dictutil.UnicodeKeyDict({}) self._debug_counts = {"inbound_message": 0, "inbound_duplicate": 0, @@ -181,7 +194,7 @@ class IntroducerService(service.MultiService, Referenceable): def get_announcements(self): """Return a list of AnnouncementDescriptor for all announcements""" announcements = [] - for (index, (_, canary, ann, when)) in self._announcements.items(): + for (index, (_, canary, ann, when)) in list(self._announcements.items()): ad = AnnouncementDescriptor(when, index, canary, ann) announcements.append(ad) return announcements @@ -189,8 +202,8 @@ class IntroducerService(service.MultiService, Referenceable): def get_subscribers(self): """Return a list of SubscriberDescriptor objects for all subscribers""" s = [] - for service_name, subscriptions in self._subscribers.items(): - for rref,(subscriber_info,when) in subscriptions.items(): + for service_name, subscriptions in list(self._subscribers.items()): + for rref,(subscriber_info,when) in list(subscriptions.items()): # note that if the subscriber didn't do Tub.setLocation, # tubid will be None. Also, subscribers do not tell us which # pubkey they use; only publishers do that. @@ -281,7 +294,7 @@ class IntroducerService(service.MultiService, Referenceable): def remote_subscribe_v2(self, subscriber, service_name, subscriber_info): self.log("introducer: subscription[%s] request at %s" % (service_name, subscriber), umid="U3uzLg") - service_name = ensure_str(service_name) + service_name = ensure_text(service_name) subscriber_info = dictutil.UnicodeKeyDict({ ensure_text(k): v for (k, v) in subscriber_info.items() }) @@ -307,11 +320,11 @@ class IntroducerService(service.MultiService, Referenceable): subscribers.pop(subscriber, None) subscriber.notifyOnDisconnect(_remove) + # Make sure types are correct: + for k in self._announcements: + assert isinstance(k[0], type(service_name)) + # now tell them about any announcements they're interested in - assert {type(service_name)}.issuperset( - set(type(k[0]) for k in self._announcements)), ( - service_name, self._announcements.keys() - ) announcements = set( [ ann_t for idx,(ann_t,canary,ann,when) in self._announcements.items() diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index ddef46db6..6f931556d 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -1,4 +1,5 @@ from __future__ import print_function +from __future__ import unicode_literals import os.path import codecs @@ -10,7 +11,7 @@ from allmydata import uri from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_aliases from allmydata.util.fileutil import move_into_place -from allmydata.util.encodingutil import unicode_to_output, quote_output +from allmydata.util.encodingutil import quote_output, quote_output_u def add_line_to_aliasfile(aliasfile, alias, cap): @@ -48,14 +49,13 @@ def add_alias(options): old_aliases = get_aliases(nodedir) if alias in old_aliases: - print("Alias %s already exists!" % quote_output(alias), file=stderr) + show_output(stderr, "Alias {alias} already exists!", alias=alias) return 1 aliasfile = os.path.join(nodedir, "private", "aliases") cap = uri.from_string_dirnode(cap).to_string() add_line_to_aliasfile(aliasfile, alias, cap) - - print("Alias %s added" % quote_output(alias), file=stdout) + show_output(stdout, "Alias {alias} added", alias=alias) return 0 def create_alias(options): @@ -75,7 +75,7 @@ def create_alias(options): old_aliases = get_aliases(nodedir) if alias in old_aliases: - print("Alias %s already exists!" % quote_output(alias), file=stderr) + show_output(stderr, "Alias {alias} already exists!", alias=alias) return 1 aliasfile = os.path.join(nodedir, "private", "aliases") @@ -93,11 +93,51 @@ def create_alias(options): # probably check for others.. add_line_to_aliasfile(aliasfile, alias, new_uri) - - print("Alias %s created" % (quote_output(alias),), file=stdout) + show_output(stdout, "Alias {alias} created", alias=alias) return 0 +def show_output(fp, template, **kwargs): + """ + Print to just about anything. + + :param fp: A file-like object to which to print. This handles the case + where ``fp`` declares a support encoding with the ``encoding`` + attribute (eg sys.stdout on Python 3). It handles the case where + ``fp`` declares no supported encoding via ``None`` for its + ``encoding`` attribute (eg sys.stdout on Python 2 when stdout is not a + tty). It handles the case where ``fp`` declares an encoding that does + not support all of the characters in the output by forcing the + "namereplace" error handler. It handles the case where there is no + ``encoding`` attribute at all (eg StringIO.StringIO) by writing + utf-8-encoded bytes. + """ + assert isinstance(template, unicode) + + # On Python 3 fp has an encoding attribute under all real usage. On + # Python 2, the encoding attribute is None if stdio is not a tty. The + # test suite often passes StringIO which has no such attribute. Make + # allowances for this until the test suite is fixed and Python 2 is no + # more. + try: + encoding = fp.encoding or "utf-8" + except AttributeError: + has_encoding = False + encoding = "utf-8" + else: + has_encoding = True + + output = template.format(**{ + k: quote_output_u(v, encoding=encoding) + for (k, v) + in kwargs.items() + }) + safe_output = output.encode(encoding, "namereplace") + if has_encoding: + safe_output = safe_output.decode(encoding) + print(safe_output, file=fp) + + def _get_alias_details(nodedir): aliases = get_aliases(nodedir) alias_names = sorted(aliases.keys()) @@ -111,34 +151,45 @@ def _get_alias_details(nodedir): return data +def _escape_format(t): + """ + _escape_format(t).format() == t + + :param unicode t: The text to escape. + """ + return t.replace("{", "{{").replace("}", "}}") + + def list_aliases(options): - nodedir = options['node-directory'] - stdout = options.stdout - stderr = options.stderr - - data = _get_alias_details(nodedir) - - max_width = max([len(quote_output(name)) for name in data.keys()] + [0]) - fmt = "%" + str(max_width) + "s: %s" - rc = 0 + """ + Show aliases that exist. + """ + data = _get_alias_details(options['node-directory']) if options['json']: - try: - # XXX why are we presuming utf-8 output? - print(json.dumps(data, indent=4).decode('utf-8'), file=stdout) - except (UnicodeEncodeError, UnicodeDecodeError): - print(json.dumps(data, indent=4), file=stderr) - rc = 1 + output = _escape_format(json.dumps(data, indent=4).decode("ascii")) else: - for name, details in data.items(): - dircap = details['readonly'] if options['readonly-uri'] else details['readwrite'] - try: - print(fmt % (unicode_to_output(name), unicode_to_output(dircap.decode('utf-8'))), file=stdout) - except (UnicodeEncodeError, UnicodeDecodeError): - print(fmt % (quote_output(name), quote_output(dircap)), file=stderr) - rc = 1 + def dircap(details): + return ( + details['readonly'] + if options['readonly-uri'] + else details['readwrite'] + ).decode("utf-8") - if rc == 1: - print("\nThis listing included aliases or caps that could not be converted to the terminal" \ - "\noutput encoding. These are shown using backslash escapes and in quotes.", file=stderr) - return rc + def format_dircap(name, details): + return fmt % (name, dircap(details)) + + max_width = max([len(quote_output(name)) for name in data.keys()] + [0]) + fmt = "%" + str(max_width) + "s: %s" + output = "\n".join(list( + format_dircap(name, details) + for name, details + in data.items() + )) + + if output: + # Show whatever we computed. Skip this if there is no output to avoid + # a spurious blank line. + show_output(options.stdout, output) + + return 0 diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 535d25bdb..294a2d215 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -561,6 +561,9 @@ class _FoolscapStorage(object): } *nickname* is optional. + + The furl will be a Unicode string on Python 3; on Python 2 it will be + either a native (bytes) string or a Unicode string. """ furl = furl.encode("utf-8") m = re.match(br'pb://(\w+)@', furl) diff --git a/src/allmydata/test/cli/common.py b/src/allmydata/test/cli/common.py index 852dce52c..bf175de44 100644 --- a/src/allmydata/test/cli/common.py +++ b/src/allmydata/test/cli/common.py @@ -1,6 +1,6 @@ from ...util.encodingutil import unicode_to_argv from ...scripts import runner -from ..common_util import ReallyEqualMixin, run_cli +from ..common_util import ReallyEqualMixin, run_cli, run_cli_unicode def parse_options(basedir, command, args): o = runner.Options() @@ -10,10 +10,41 @@ def parse_options(basedir, command, args): return o class CLITestMixin(ReallyEqualMixin): - def do_cli(self, verb, *args, **kwargs): + """ + A mixin for use with ``GridTestMixin`` to execute CLI commands against + nodes created by methods of that mixin. + """ + def do_cli_unicode(self, verb, argv, client_num=0, **kwargs): + """ + Run a Tahoe-LAFS CLI command. + + :param verb: See ``run_cli_unicode``. + + :param argv: See ``run_cli_unicode``. + + :param int client_num: The number of the ``GridTestMixin``-created + node against which to execute the command. + + :param kwargs: Additional keyword arguments to pass to + ``run_cli_unicode``. + """ # client_num is used to execute client CLI commands on a specific # client. - client_num = kwargs.get("client_num", 0) + client_dir = self.get_clientdir(i=client_num) + nodeargs = [ u"--node-directory", client_dir ] + return run_cli_unicode(verb, argv, nodeargs=nodeargs, **kwargs) + + + def do_cli(self, verb, *args, **kwargs): + """ + Like ``do_cli_unicode`` but work with ``bytes`` everywhere instead of + ``unicode``. + + Where possible, prefer ``do_cli_unicode``. + """ + # client_num is used to execute client CLI commands on a specific + # client. + client_num = kwargs.pop("client_num", 0) client_dir = unicode_to_argv(self.get_clientdir(i=client_num)) - nodeargs = [ "--node-directory", client_dir ] - return run_cli(verb, nodeargs=nodeargs, *args, **kwargs) + nodeargs = [ b"--node-directory", client_dir ] + return run_cli(verb, *args, nodeargs=nodeargs, **kwargs) diff --git a/src/allmydata/test/cli/test_alias.py b/src/allmydata/test/cli/test_alias.py index 6542d154f..72b634608 100644 --- a/src/allmydata/test/cli/test_alias.py +++ b/src/allmydata/test/cli/test_alias.py @@ -1,105 +1,126 @@ import json -from mock import patch from twisted.trial import unittest from twisted.internet.defer import inlineCallbacks -from allmydata.util.encodingutil import unicode_to_argv from allmydata.scripts.common import get_aliases from allmydata.test.no_network import GridTestMixin from .common import CLITestMixin -from ..common_util import skip_if_cannot_represent_argv +from allmydata.util import encodingutil # see also test_create_alias class ListAlias(GridTestMixin, CLITestMixin, unittest.TestCase): @inlineCallbacks - def test_list(self): - self.basedir = "cli/ListAlias/test_list" + def _check_create_alias(self, alias, encoding): + """ + Verify that ``tahoe create-alias`` can be used to create an alias named + ``alias`` when argv is encoded using ``encoding``. + + :param unicode alias: The alias to try to create. + + :param NoneType|str encoding: The name of an encoding to force the + ``create-alias`` implementation to use. This simulates the + effects of setting LANG and doing other locale-foolishness without + actually having to mess with this process's global locale state. + If this is ``None`` then the encoding used will be ascii but the + stdio objects given to the code under test will not declare any + encoding (this is like Python 2 when stdio is not a tty). + + :return Deferred: A Deferred that fires with success if the alias can + be created and that creation is reported on stdout appropriately + encoded or with failure if something goes wrong. + """ + self.basedir = self.mktemp() self.set_up_grid(oneshare=True) - rc, stdout, stderr = yield self.do_cli( - "create-alias", - unicode_to_argv(u"tahoe"), + # We can pass an encoding into the test utilities to invoke the code + # under test but we can't pass such a parameter directly to the code + # under test. Instead, that code looks at io_encoding. So, + # monkey-patch that value to our desired value here. This is the code + # that most directly takes the place of messing with LANG or the + # locale module. + self.patch(encodingutil, "io_encoding", encoding or "ascii") + + rc, stdout, stderr = yield self.do_cli_unicode( + u"create-alias", + [alias], + encoding=encoding, ) - self.failUnless(unicode_to_argv(u"Alias 'tahoe' created") in stdout) - self.failIf(stderr) - aliases = get_aliases(self.get_clientdir()) - self.failUnless(u"tahoe" in aliases) - self.failUnless(aliases[u"tahoe"].startswith("URI:DIR2:")) + # Make sure the result of the create-alias command is as we want it to + # be. + self.assertEqual(u"Alias '{}' created\n".format(alias), stdout) + self.assertEqual("", stderr) + self.assertEqual(0, rc) - rc, stdout, stderr = yield self.do_cli("list-aliases", "--json") + # Make sure it had the intended side-effect, too - an alias created in + # the node filesystem state. + aliases = get_aliases(self.get_clientdir()) + self.assertIn(alias, aliases) + self.assertTrue(aliases[alias].startswith(u"URI:DIR2:")) + + # And inspect the state via the user interface list-aliases command + # too. + rc, stdout, stderr = yield self.do_cli_unicode( + u"list-aliases", + [u"--json"], + encoding=encoding, + ) self.assertEqual(0, rc) data = json.loads(stdout) - self.assertIn(u"tahoe", data) - data = data[u"tahoe"] - self.assertIn("readwrite", data) - self.assertIn("readonly", data) + self.assertIn(alias, data) + data = data[alias] + self.assertIn(u"readwrite", data) + self.assertIn(u"readonly", data) - @inlineCallbacks - def test_list_unicode_mismatch_json(self): - """ - pretty hack-y test, but we want to cover the 'except' on Unicode - errors paths and I can't come up with a nicer way to trigger - this - """ - self.basedir = "cli/ListAlias/test_list_unicode_mismatch_json" - skip_if_cannot_represent_argv(u"tahoe\u263A") - self.set_up_grid(oneshare=True) - rc, stdout, stderr = yield self.do_cli( - "create-alias", - unicode_to_argv(u"tahoe\u263A"), + def test_list_none(self): + """ + An alias composed of all ASCII-encodeable code points can be created when + stdio aren't clearly marked with an encoding. + """ + return self._check_create_alias( + u"tahoe", + encoding=None, ) - self.failUnless(unicode_to_argv(u"Alias 'tahoe\u263A' created") in stdout) - self.failIf(stderr) - booms = [] - - def boom(out, indent=4): - if not len(booms): - booms.append(out) - raise UnicodeEncodeError("foo", u"foo", 3, 5, "foo") - return str(out) - - with patch("allmydata.scripts.tahoe_add_alias.json.dumps", boom): - aliases = get_aliases(self.get_clientdir()) - self.failUnless(u"tahoe\u263A" in aliases) - self.failUnless(aliases[u"tahoe\u263A"].startswith("URI:DIR2:")) - - rc, stdout, stderr = yield self.do_cli("list-aliases", "--json") - - self.assertEqual(1, rc) - self.assertIn("could not be converted", stderr) - - @inlineCallbacks - def test_list_unicode_mismatch(self): - self.basedir = "cli/ListAlias/test_list_unicode_mismatch" - skip_if_cannot_represent_argv(u"tahoe\u263A") - self.set_up_grid(oneshare=True) - - rc, stdout, stderr = yield self.do_cli( - "create-alias", - unicode_to_argv(u"tahoe\u263A"), + def test_list_ascii(self): + """ + An alias composed of all ASCII-encodeable code points can be created when + the active encoding is ASCII. + """ + return self._check_create_alias( + u"tahoe", + encoding="ascii", ) - def boom(out): - print("boom {}".format(out)) - return out - raise UnicodeEncodeError("foo", u"foo", 3, 5, "foo") - with patch("allmydata.scripts.tahoe_add_alias.unicode_to_output", boom): - self.failUnless(unicode_to_argv(u"Alias 'tahoe\u263A' created") in stdout) - self.failIf(stderr) - aliases = get_aliases(self.get_clientdir()) - self.failUnless(u"tahoe\u263A" in aliases) - self.failUnless(aliases[u"tahoe\u263A"].startswith("URI:DIR2:")) + def test_list_latin_1(self): + """ + An alias composed of all Latin-1-encodeable code points can be created + when the active encoding is Latin-1. - rc, stdout, stderr = yield self.do_cli("list-aliases") + This is very similar to ``test_list_utf_8`` but the assumption of + UTF-8 is nearly ubiquitous and explicitly exercising the codepaths + with a UTF-8-incompatible encoding helps flush out unintentional UTF-8 + assumptions. + """ + return self._check_create_alias( + u"taho\N{LATIN SMALL LETTER E WITH ACUTE}", + encoding="latin-1", + ) - self.assertEqual(1, rc) - self.assertIn("could not be converted", stderr) + + def test_list_utf_8(self): + """ + An alias composed of all UTF-8-encodeable code points can be created when + the active encoding is UTF-8. + """ + return self._check_create_alias( + u"tahoe\N{SNOWMAN}", + encoding="utf-8", + ) diff --git a/src/allmydata/test/cli/test_cp.py b/src/allmydata/test/cli/test_cp.py index ba1894f1c..6cebec4a5 100644 --- a/src/allmydata/test/cli/test_cp.py +++ b/src/allmydata/test/cli/test_cp.py @@ -661,7 +661,7 @@ starting copy, 2 files, 1 directories # This test ensures that tahoe will copy a file from the grid to # a local directory without a specified file name. # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2027 - self.basedir = "cli/Cp/cp_verbose" + self.basedir = "cli/Cp/ticket_2027" self.set_up_grid(oneshare=True) # Write a test file, which we'll copy to the grid. diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py index f93272540..48415eabb 100644 --- a/src/allmydata/test/common.py +++ b/src/allmydata/test/common.py @@ -11,6 +11,8 @@ __all__ = [ "skipIf", ] +from past.builtins import chr as byteschr + import os, random, struct import six import tempfile @@ -1057,7 +1059,7 @@ def _corrupt_share_data_last_byte(data, debug=False): sharedatasize = struct.unpack(">Q", data[0x0c+0x08:0x0c+0x0c+8])[0] offset = 0x0c+0x44+sharedatasize-1 - newdata = data[:offset] + chr(ord(data[offset])^0xFF) + data[offset+1:] + newdata = data[:offset] + byteschr(ord(data[offset:offset+1])^0xFF) + data[offset+1:] if debug: log.msg("testing: flipping all bits of byte at offset %d: %r, newdata: %r" % (offset, data[offset], newdata[offset])) return newdata @@ -1085,7 +1087,7 @@ def _corrupt_crypttext_hash_tree_byte_x221(data, debug=False): assert sharevernum in (1, 2), "This test is designed to corrupt immutable shares of v1 or v2 in specific ways." if debug: log.msg("original data: %r" % (data,)) - return data[:0x0c+0x221] + chr(ord(data[0x0c+0x221])^0x02) + data[0x0c+0x2210+1:] + return data[:0x0c+0x221] + byteschr(ord(data[0x0c+0x221:0x0c+0x221+1])^0x02) + data[0x0c+0x2210+1:] def _corrupt_block_hashes(data, debug=False): """Scramble the file data -- the field containing the block hash tree diff --git a/src/allmydata/test/common_util.py b/src/allmydata/test/common_util.py index e3f5cf750..341d383c1 100644 --- a/src/allmydata/test/common_util.py +++ b/src/allmydata/test/common_util.py @@ -5,6 +5,10 @@ import time import signal from random import randrange from six.moves import StringIO +from io import ( + TextIOWrapper, + BytesIO, +) from twisted.internet import reactor, defer from twisted.python import failure @@ -35,27 +39,131 @@ def skip_if_cannot_represent_argv(u): except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII argv could not be encoded on this platform.") -def run_cli(verb, *args, **kwargs): - precondition(not [True for arg in args if not isinstance(arg, str)], - "arguments to do_cli must be strs -- convert using unicode_to_argv", args=args) - nodeargs = kwargs.get("nodeargs", []) + +def _getvalue(io): + """ + Read out the complete contents of a file-like object. + """ + io.seek(0) + return io.read() + + +def run_cli_bytes(verb, *args, **kwargs): + """ + Run a Tahoe-LAFS CLI command specified as bytes. + + Most code should prefer ``run_cli_unicode`` which deals with all the + necessary encoding considerations. This helper still exists so that novel + misconfigurations can be explicitly tested (for example, receiving UTF-8 + bytes when the system encoding claims to be ASCII). + + :param bytes verb: The command to run. For example, ``b"create-node"``. + + :param [bytes] args: The arguments to pass to the command. For example, + ``(b"--hostname=localhost",)``. + + :param [bytes] nodeargs: Extra arguments to pass to the Tahoe executable + before ``verb``. + + :param bytes stdin: Text to pass to the command via stdin. + + :param NoneType|str encoding: The name of an encoding which stdout and + stderr will be configured to use. ``None`` means stdout and stderr + will accept bytes and unicode and use the default system encoding for + translating between them. + """ + nodeargs = kwargs.pop("nodeargs", []) + encoding = kwargs.pop("encoding", None) + precondition( + all(isinstance(arg, bytes) for arg in [verb] + nodeargs + list(args)), + "arguments to run_cli must be bytes -- convert using unicode_to_argv", + verb=verb, + args=args, + nodeargs=nodeargs, + ) argv = nodeargs + [verb] + list(args) stdin = kwargs.get("stdin", "") - stdout = StringIO() - stderr = StringIO() + if encoding is None: + # The original behavior, the Python 2 behavior, is to accept either + # bytes or unicode and try to automatically encode or decode as + # necessary. This works okay for ASCII and if LANG is set + # appropriately. These aren't great constraints so we should move + # away from this behavior. + stdout = StringIO() + stderr = StringIO() + else: + # The new behavior, the Python 3 behavior, is to accept unicode and + # encode it using a specific encoding. For older versions of Python + # 3, the encoding is determined from LANG (bad) but for newer Python + # 3, the encoding is always utf-8 (good). Tests can pass in different + # encodings to exercise different behaviors. + stdout = TextIOWrapper(BytesIO(), encoding) + stderr = TextIOWrapper(BytesIO(), encoding) d = defer.succeed(argv) d.addCallback(runner.parse_or_exit_with_explanation, stdout=stdout) d.addCallback(runner.dispatch, stdin=StringIO(stdin), stdout=stdout, stderr=stderr) def _done(rc): - return 0, stdout.getvalue(), stderr.getvalue() + return 0, _getvalue(stdout), _getvalue(stderr) def _err(f): f.trap(SystemExit) - return f.value.code, stdout.getvalue(), stderr.getvalue() + return f.value.code, _getvalue(stdout), _getvalue(stderr) d.addCallbacks(_done, _err) return d + +def run_cli_unicode(verb, argv, nodeargs=None, stdin=None, encoding=None): + """ + Run a Tahoe-LAFS CLI command. + + :param unicode verb: The command to run. For example, ``u"create-node"``. + + :param [unicode] argv: The arguments to pass to the command. For example, + ``[u"--hostname=localhost"]``. + + :param [unicode] nodeargs: Extra arguments to pass to the Tahoe executable + before ``verb``. + + :param unicode stdin: Text to pass to the command via stdin. + + :param NoneType|str encoding: The name of an encoding to use for all + bytes/unicode conversions necessary *and* the encoding to cause stdio + to declare with its ``encoding`` attribute. ``None`` means ASCII will + be used and no declaration will be made at all. + """ + if nodeargs is None: + nodeargs = [] + precondition( + all(isinstance(arg, unicode) for arg in [verb] + nodeargs + argv), + "arguments to run_cli_unicode must be unicode", + verb=verb, + nodeargs=nodeargs, + argv=argv, + ) + codec = encoding or "ascii" + encode = lambda t: None if t is None else t.encode(codec) + d = run_cli_bytes( + encode(verb), + nodeargs=list(encode(arg) for arg in nodeargs), + stdin=encode(stdin), + encoding=encoding, + *list(encode(arg) for arg in argv) + ) + def maybe_decode(result): + code, stdout, stderr = result + if isinstance(stdout, bytes): + stdout = stdout.decode(codec) + if isinstance(stderr, bytes): + stderr = stderr.decode(codec) + return code, stdout, stderr + d.addCallback(maybe_decode) + return d + + +run_cli = run_cli_bytes + + def parse_cli(*argv): # This parses the CLI options (synchronously), and returns the Options # argument, or throws usage.UsageError if something went wrong. diff --git a/src/allmydata/test/test_repairer.py b/src/allmydata/test/test_repairer.py index 4fdffe70e..63a54a505 100644 --- a/src/allmydata/test/test_repairer.py +++ b/src/allmydata/test/test_repairer.py @@ -1,5 +1,15 @@ # -*- coding: utf-8 -*- +""" +Ported to Python 3. +""" from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 from allmydata.test import common from allmydata.monitor import Monitor @@ -62,7 +72,7 @@ class RepairTestMixin(object): c0 = self.g.clients[0] c1 = self.g.clients[1] c0.encoding_params['max_segment_size'] = 12 - d = c0.upload(upload.Data(common.TEST_DATA, convergence="")) + d = c0.upload(upload.Data(common.TEST_DATA, convergence=b"")) def _stash_uri(ur): self.uri = ur.get_uri() self.c0_filenode = c0.create_node_from_uri(ur.get_uri()) @@ -464,7 +474,7 @@ class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, # previously-deleted share #2. d.addCallback(lambda ignored: - self.delete_shares_numbered(self.uri, range(3, 10+1))) + self.delete_shares_numbered(self.uri, list(range(3, 10+1)))) d.addCallback(lambda ignored: download_to_data(self.c1_filenode)) d.addCallback(lambda newdata: self.failUnlessEqual(newdata, common.TEST_DATA)) @@ -476,7 +486,7 @@ class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, self.set_up_grid(num_clients=2) d = self.upload_and_stash() d.addCallback(lambda ignored: - self.delete_shares_numbered(self.uri, range(7))) + self.delete_shares_numbered(self.uri, list(range(7)))) d.addCallback(lambda ignored: self._stash_counts()) d.addCallback(lambda ignored: self.c0_filenode.check_and_repair(Monitor(), @@ -509,7 +519,7 @@ class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, # previously-deleted share #2. d.addCallback(lambda ignored: - self.delete_shares_numbered(self.uri, range(3, 10+1))) + self.delete_shares_numbered(self.uri, list(range(3, 10+1)))) d.addCallback(lambda ignored: download_to_data(self.c1_filenode)) d.addCallback(lambda newdata: self.failUnlessEqual(newdata, common.TEST_DATA)) @@ -527,7 +537,7 @@ class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, # distributing the shares widely enough to satisfy the default # happiness setting. def _delete_some_servers(ignored): - for i in xrange(7): + for i in range(7): self.g.remove_server(self.g.servers_by_number[i].my_nodeid) assert len(self.g.servers_by_number) == 3 @@ -640,7 +650,7 @@ class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, # downloading and has the right contents. This can't work # unless it has already repaired the previously-corrupted share. def _then_delete_7_and_try_a_download(unused=None): - shnums = range(10) + shnums = list(range(10)) shnums.remove(shnum) random.shuffle(shnums) for sharenum in shnums[:7]: @@ -679,10 +689,10 @@ class Repairer(GridTestMixin, unittest.TestCase, RepairTestMixin, self.basedir = "repairer/Repairer/test_tiny_reads" self.set_up_grid() c0 = self.g.clients[0] - DATA = "a"*135 + DATA = b"a"*135 c0.encoding_params['k'] = 22 c0.encoding_params['n'] = 66 - d = c0.upload(upload.Data(DATA, convergence="")) + d = c0.upload(upload.Data(DATA, convergence=b"")) def _then(ur): self.uri = ur.get_uri() self.delete_shares_numbered(self.uri, [0]) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 53b670886..33e55bd3b 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -2563,6 +2563,7 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): def _run_in_subprocess(ignored, verb, *args, **kwargs): stdin = kwargs.get("stdin") + # XXX https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3548 env = kwargs.get("env", os.environ) # Python warnings from the child process don't matter. env["PYTHONWARNINGS"] = "ignore" diff --git a/src/allmydata/util/_python3.py b/src/allmydata/util/_python3.py index 93e488c6e..4d1d4356a 100644 --- a/src/allmydata/util/_python3.py +++ b/src/allmydata/util/_python3.py @@ -35,6 +35,7 @@ PORTED_MODULES = [ "allmydata.crypto.rsa", "allmydata.crypto.util", "allmydata.hashtree", + "allmydata.immutable.checker", "allmydata.immutable.downloader", "allmydata.immutable.downloader.common", "allmydata.immutable.downloader.fetcher", @@ -49,9 +50,13 @@ PORTED_MODULES = [ "allmydata.immutable.layout", "allmydata.immutable.literal", "allmydata.immutable.offloaded", + "allmydata.immutable.repairer", "allmydata.immutable.upload", "allmydata.interfaces", + "allmydata.introducer.client", + "allmydata.introducer.common", "allmydata.introducer.interfaces", + "allmydata.introducer.server", "allmydata.monitor", "allmydata.mutable.checker", "allmydata.mutable.common", @@ -151,6 +156,7 @@ PORTED_TEST_MODULES = [ "allmydata.test.test_observer", "allmydata.test.test_pipeline", "allmydata.test.test_python3", + "allmydata.test.test_repairer", "allmydata.test.test_spans", "allmydata.test.test_statistics", "allmydata.test.test_storage", diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 17a7a2f38..f13dc5b8e 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -252,6 +252,16 @@ ESCAPABLE_UNICODE = re.compile(u'([\uD800-\uDBFF][\uDC00-\uDFFF])|' # valid sur ESCAPABLE_8BIT = re.compile( br'[^ !#\x25-\x5B\x5D-\x5F\x61-\x7E]', re.DOTALL) +def quote_output_u(*args, **kwargs): + """ + Like ``quote_output`` but always return ``unicode``. + """ + result = quote_output(*args, **kwargs) + if isinstance(result, unicode): + return result + return result.decode(kwargs.get("encoding", None) or io_encoding) + + def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None): """ Encode either a Unicode string or a UTF-8-encoded bytestring for representation diff --git a/src/allmydata/web/statistics.xhtml b/src/allmydata/web/statistics.xhtml index 42376079d..2cc7e2b5a 100644 --- a/src/allmydata/web/statistics.xhtml +++ b/src/allmydata/web/statistics.xhtml @@ -12,8 +12,6 @@