From 2cc2cb6a7f655bd62ab8973e86a6c7847fa5c19f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Fri, 14 Aug 2020 13:49:48 -0400 Subject: [PATCH 1/8] Rename to_str() to the more accurate to_bytes(). --- src/allmydata/scripts/backupdb.py | 6 ++-- src/allmydata/scripts/debug.py | 12 ++++---- src/allmydata/scripts/tahoe_backup.py | 4 +-- src/allmydata/scripts/tahoe_cp.py | 34 ++++++++++----------- src/allmydata/scripts/tahoe_ls.py | 6 ++-- src/allmydata/scripts/tahoe_mv.py | 4 +-- src/allmydata/test/cli/test_check.py | 4 +-- src/allmydata/test/cli/test_cp.py | 24 +++++++-------- src/allmydata/test/test_encodingutil.py | 14 ++++----- src/allmydata/test/web/test_grid.py | 30 +++++++++---------- src/allmydata/test/web/test_web.py | 40 ++++++++++++------------- src/allmydata/util/encodingutil.py | 10 +++++-- src/allmydata/web/common.py | 6 ++-- src/allmydata/web/directory.py | 4 +-- 14 files changed, 101 insertions(+), 97 deletions(-) diff --git a/src/allmydata/scripts/backupdb.py b/src/allmydata/scripts/backupdb.py index 234f8524a..d188eec3c 100644 --- a/src/allmydata/scripts/backupdb.py +++ b/src/allmydata/scripts/backupdb.py @@ -6,7 +6,7 @@ from allmydata.util.netstring import netstring from allmydata.util.hashutil import backupdb_dirhash from allmydata.util import base32 from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import to_str +from allmydata.util.encodingutil import to_bytes from allmydata.util.dbutil import get_db, DBError @@ -218,7 +218,7 @@ class BackupDB_v2(object): probability = min(max(probability, 0.0), 1.0) should_check = bool(random.random() < probability) - return FileResult(self, to_str(filecap), should_check, + return FileResult(self, to_bytes(filecap), should_check, path, mtime, ctime, size) def get_or_allocate_fileid_for_cap(self, filecap): @@ -321,7 +321,7 @@ class BackupDB_v2(object): probability = min(max(probability, 0.0), 1.0) should_check = bool(random.random() < probability) - return DirectoryResult(self, dirhash_s, to_str(dircap), should_check) + return DirectoryResult(self, dirhash_s, to_bytes(dircap), should_check) def did_create_directory(self, dircap, dirhash): now = time.time() diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index fff58caff..789218f8b 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -63,7 +63,7 @@ def dump_immutable_chk_share(f, out, options): from allmydata import uri from allmydata.util import base32 from allmydata.immutable.layout import ReadBucketProxy - from allmydata.util.encodingutil import quote_output, to_str + from allmydata.util.encodingutil import quote_output, to_bytes # use a ReadBucketProxy to parse the bucket and find the uri extension bp = ReadBucketProxy(None, None, '') @@ -109,7 +109,7 @@ def dump_immutable_chk_share(f, out, options): # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: - piece = to_str(pieces[-2]) + piece = to_bytes(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) uri_extension_hash = base32.a2b(unpacked["UEB_hash"]) @@ -221,7 +221,7 @@ def dump_SDMF_share(m, length, options): from allmydata.mutable.common import NeedMoreDataError from allmydata.util import base32, hashutil from allmydata.uri import SSKVerifierURI - from allmydata.util.encodingutil import quote_output, to_str + from allmydata.util.encodingutil import quote_output, to_bytes offset = m.DATA_OFFSET @@ -269,7 +269,7 @@ def dump_SDMF_share(m, length, options): # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: - piece = to_str(pieces[-2]) + piece = to_bytes(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey) @@ -307,7 +307,7 @@ def dump_MDMF_share(m, length, options): from allmydata.mutable.layout import MDMFSlotReadProxy from allmydata.util import base32, hashutil from allmydata.uri import MDMFVerifierURI - from allmydata.util.encodingutil import quote_output, to_str + from allmydata.util.encodingutil import quote_output, to_bytes offset = m.DATA_OFFSET out = options.stdout @@ -363,7 +363,7 @@ def dump_MDMF_share(m, length, options): # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: - piece = to_str(pieces[-2]) + piece = to_bytes(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey) diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py index 558c3d6d3..c63558eb1 100644 --- a/src/allmydata/scripts/tahoe_backup.py +++ b/src/allmydata/scripts/tahoe_backup.py @@ -11,7 +11,7 @@ from allmydata.scripts.common_http import do_http, HTTPError, format_http_error from allmydata.util import time_format from allmydata.scripts import backupdb from allmydata.util.encodingutil import listdir_unicode, quote_output, \ - quote_local_unicode_path, to_str, FilenameEncodingError, unicode_to_url + quote_local_unicode_path, to_bytes, FilenameEncodingError, unicode_to_url from allmydata.util.assertutil import precondition from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath @@ -47,7 +47,7 @@ def mkdir(contents, options): if resp.status < 200 or resp.status >= 300: raise HTTPError("Error during mkdir", resp) - dircap = to_str(resp.read().strip()) + dircap = to_bytes(resp.read().strip()) return dircap def put_child(dirurl, childname, childcap): diff --git a/src/allmydata/scripts/tahoe_cp.py b/src/allmydata/scripts/tahoe_cp.py index 5d0849c56..c90dca072 100644 --- a/src/allmydata/scripts/tahoe_cp.py +++ b/src/allmydata/scripts/tahoe_cp.py @@ -13,7 +13,7 @@ from allmydata import uri from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \ - quote_local_unicode_path, to_str + quote_local_unicode_path, to_bytes from allmydata.util.assertutil import precondition, _assert @@ -254,8 +254,8 @@ class TahoeDirectorySource(object): def init_from_parsed(self, parsed): nodetype, d = parsed - self.writecap = to_str(d.get("rw_uri")) - self.readcap = to_str(d.get("ro_uri")) + self.writecap = to_bytes(d.get("rw_uri")) + self.readcap = to_bytes(d.get("ro_uri")) self.mutable = d.get("mutable", False) # older nodes don't provide it self.children_d = dict( [(unicode(name),value) for (name,value) @@ -270,13 +270,13 @@ class TahoeDirectorySource(object): self.progressfunc("examining %d of %d" % (i+1, len(self.children_d))) if data[0] == "filenode": mutable = data[1].get("mutable", False) - writecap = to_str(data[1].get("rw_uri")) - readcap = to_str(data[1].get("ro_uri")) + writecap = to_bytes(data[1].get("rw_uri")) + readcap = to_bytes(data[1].get("ro_uri")) self.children[name] = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name) elif data[0] == "dirnode": - writecap = to_str(data[1].get("rw_uri")) - readcap = to_str(data[1].get("ro_uri")) + writecap = to_bytes(data[1].get("rw_uri")) + readcap = to_bytes(data[1].get("ro_uri")) if writecap and writecap in self.cache: child = self.cache[writecap] elif readcap and readcap in self.cache: @@ -324,8 +324,8 @@ class TahoeDirectoryTarget(object): def init_from_parsed(self, parsed): nodetype, d = parsed - self.writecap = to_str(d.get("rw_uri")) - self.readcap = to_str(d.get("ro_uri")) + self.writecap = to_bytes(d.get("rw_uri")) + self.readcap = to_bytes(d.get("ro_uri")) self.mutable = d.get("mutable", False) # older nodes don't provide it self.children_d = dict( [(unicode(name),value) for (name,value) @@ -365,8 +365,8 @@ class TahoeDirectoryTarget(object): self.progressfunc("examining %d of %d" % (i+1, len(self.children_d))) if data[0] == "filenode": mutable = data[1].get("mutable", False) - writecap = to_str(data[1].get("rw_uri")) - readcap = to_str(data[1].get("ro_uri")) + writecap = to_bytes(data[1].get("rw_uri")) + readcap = to_bytes(data[1].get("ro_uri")) url = None if self.writecap: url = self.nodeurl + "/".join(["uri", @@ -375,8 +375,8 @@ class TahoeDirectoryTarget(object): self.children[name] = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) elif data[0] == "dirnode": - writecap = to_str(data[1].get("rw_uri")) - readcap = to_str(data[1].get("ro_uri")) + writecap = to_bytes(data[1].get("rw_uri")) + readcap = to_bytes(data[1].get("ro_uri")) if writecap and writecap in self.cache: child = self.cache[writecap] elif readcap and readcap in self.cache: @@ -619,8 +619,8 @@ class Copier(object): self.progress) t.init_from_parsed(parsed) else: - writecap = to_str(d.get("rw_uri")) - readcap = to_str(d.get("ro_uri")) + writecap = to_bytes(d.get("rw_uri")) + readcap = to_bytes(d.get("ro_uri")) mutable = d.get("mutable", False) t = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) @@ -682,8 +682,8 @@ class Copier(object): else: if had_trailing_slash: raise FilenameWithTrailingSlashError(source_spec) - writecap = to_str(d.get("rw_uri")) - readcap = to_str(d.get("ro_uri")) + writecap = to_bytes(d.get("rw_uri")) + readcap = to_bytes(d.get("ro_uri")) mutable = d.get("mutable", False) # older nodes don't provide it t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name) return t diff --git a/src/allmydata/scripts/tahoe_ls.py b/src/allmydata/scripts/tahoe_ls.py index 1332da32a..2bfe16d27 100644 --- a/src/allmydata/scripts/tahoe_ls.py +++ b/src/allmydata/scripts/tahoe_ls.py @@ -5,7 +5,7 @@ import json from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.encodingutil import unicode_to_output, quote_output, is_printable_ascii, to_str +from allmydata.util.encodingutil import unicode_to_output, quote_output, is_printable_ascii, to_bytes def list(options): nodeurl = options['node-url'] @@ -94,8 +94,8 @@ def list(options): mtime = child[1].get("metadata", {}).get('tahoe', {}).get("linkmotime") if not mtime: mtime = child[1]["metadata"].get("mtime") - rw_uri = to_str(child[1].get("rw_uri")) - ro_uri = to_str(child[1].get("ro_uri")) + rw_uri = to_bytes(child[1].get("rw_uri")) + ro_uri = to_bytes(child[1].get("ro_uri")) if ctime: # match for formatting that GNU 'ls' does if (now - ctime) > 6*30*24*60*60: diff --git a/src/allmydata/scripts/tahoe_mv.py b/src/allmydata/scripts/tahoe_mv.py index 4caaca928..7d13ea72a 100644 --- a/src/allmydata/scripts/tahoe_mv.py +++ b/src/allmydata/scripts/tahoe_mv.py @@ -6,7 +6,7 @@ import json from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.encodingutil import to_str +from allmydata.util.encodingutil import to_bytes # this script is used for both 'mv' and 'ln' @@ -35,7 +35,7 @@ def mv(options, mode="move"): return 1 data = resp.read() nodetype, attrs = json.loads(data) - cap = to_str(attrs.get("rw_uri") or attrs["ro_uri"]) + cap = to_bytes(attrs.get("rw_uri") or attrs["ro_uri"]) # now get the target try: diff --git a/src/allmydata/test/cli/test_check.py b/src/allmydata/test/cli/test_check.py index a2476e6c7..85649e262 100644 --- a/src/allmydata/test/cli/test_check.py +++ b/src/allmydata/test/cli/test_check.py @@ -5,7 +5,7 @@ from six.moves import cStringIO as StringIO from allmydata import uri from allmydata.util import base32 -from allmydata.util.encodingutil import quote_output, to_str +from allmydata.util.encodingutil import quote_output, to_bytes from allmydata.mutable.publish import MutableData from allmydata.immutable import upload from allmydata.scripts import debug @@ -41,7 +41,7 @@ class Check(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) data = json.loads(out) - self.failUnlessReallyEqual(to_str(data["summary"]), "Healthy") + self.failUnlessReallyEqual(to_bytes(data["summary"]), "Healthy") self.failUnlessReallyEqual(data["results"]["healthy"], True) d.addCallback(_check2) diff --git a/src/allmydata/test/cli/test_cp.py b/src/allmydata/test/cli/test_cp.py index 7b076f327..59331029b 100644 --- a/src/allmydata/test/cli/test_cp.py +++ b/src/allmydata/test/cli/test_cp.py @@ -8,7 +8,7 @@ from twisted.internet import defer from allmydata.scripts import cli from allmydata.util import fileutil from allmydata.util.encodingutil import (quote_output, get_io_encoding, - unicode_to_output, to_str) + unicode_to_output, to_bytes) from allmydata.util.assertutil import _assert from ..no_network import GridTestMixin from .common import CLITestMixin @@ -272,9 +272,9 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnless(data['mutable']) self.failUnlessIn("rw_uri", data) - self.rw_uri = to_str(data["rw_uri"]) + self.rw_uri = to_bytes(data["rw_uri"]) self.failUnlessIn("ro_uri", data) - self.ro_uri = to_str(data["ro_uri"]) + self.ro_uri = to_bytes(data["ro_uri"]) d.addCallback(_get_test_txt_uris) # Now make a new file to copy in place of test.txt. @@ -306,9 +306,9 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnless(data['mutable']) self.failUnlessIn("ro_uri", data) - self.failUnlessEqual(to_str(data["ro_uri"]), self.ro_uri) + self.failUnlessEqual(to_bytes(data["ro_uri"]), self.ro_uri) self.failUnlessIn("rw_uri", data) - self.failUnlessEqual(to_str(data["rw_uri"]), self.rw_uri) + self.failUnlessEqual(to_bytes(data["rw_uri"]), self.rw_uri) d.addCallback(_check_json) # and, finally, doing a GET directly on one of the old uris @@ -381,7 +381,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failIf(childdata['mutable']) self.failUnlessIn("ro_uri", childdata) uri_key = "ro_uri" - self.childuris[k] = to_str(childdata[uri_key]) + self.childuris[k] = to_bytes(childdata[uri_key]) d.addCallback(_process_directory_json) # Now build a local directory to copy into place, like the following: # test2/ @@ -410,11 +410,11 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): if "mutable" in fn: self.failUnless(data['mutable']) self.failUnlessIn("rw_uri", data) - self.failUnlessEqual(to_str(data["rw_uri"]), self.childuris[fn]) + self.failUnlessEqual(to_bytes(data["rw_uri"]), self.childuris[fn]) else: self.failIf(data['mutable']) self.failUnlessIn("ro_uri", data) - self.failIfEqual(to_str(data["ro_uri"]), self.childuris[fn]) + self.failIfEqual(to_bytes(data["ro_uri"]), self.childuris[fn]) for fn in ("mutable1", "mutable2"): d.addCallback(lambda ignored, fn=fn: @@ -456,7 +456,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessEqual(filetype, "filenode") self.failIf(data['mutable']) self.failUnlessIn("ro_uri", data) - self.failUnlessEqual(to_str(data["ro_uri"]), self.childuris["imm2"]) + self.failUnlessEqual(to_bytes(data["ro_uri"]), self.childuris["imm2"]) d.addCallback(_process_imm2_json) return d @@ -497,7 +497,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessEqual(filetype, "filenode") self.failUnless(data['mutable']) self.failUnlessIn("ro_uri", data) - self._test_read_uri = to_str(data["ro_uri"]) + self._test_read_uri = to_bytes(data["ro_uri"]) d.addCallback(_process_test_json) # Now we'll link the readonly URI into the tahoe: alias. d.addCallback(lambda ignored: @@ -521,7 +521,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessEqual(testtype, "filenode") self.failUnless(testdata['mutable']) self.failUnlessIn("ro_uri", testdata) - self.failUnlessEqual(to_str(testdata["ro_uri"]), self._test_read_uri) + self.failUnlessEqual(to_bytes(testdata["ro_uri"]), self._test_read_uri) self.failIfIn("rw_uri", testdata) d.addCallback(_process_tahoe_json) # Okay, now we're going to try uploading another mutable file in @@ -589,7 +589,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessEqual(file2type, "filenode") self.failUnless(file2data['mutable']) self.failUnlessIn("ro_uri", file2data) - self.failUnlessEqual(to_str(file2data["ro_uri"]), self._test_read_uri) + self.failUnlessEqual(to_bytes(file2data["ro_uri"]), self._test_read_uri) self.failIfIn("rw_uri", file2data) d.addCallback(_got_testdir_json) return d diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index cf72fc9ac..208b0f98d 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -84,7 +84,7 @@ from allmydata.util import encodingutil, fileutil from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ - get_io_encoding, get_filesystem_encoding, to_str, from_utf8_or_none, _reload, \ + get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ unicode_to_argv from twisted.python import usage @@ -600,12 +600,12 @@ class OpenBSD(EncodingUtil, unittest.TestCase): class TestToFromStr(ReallyEqualMixin, unittest.TestCase): - def test_to_str(self): - self.failUnlessReallyEqual(to_str(b"foo"), b"foo") - self.failUnlessReallyEqual(to_str(b"lumi\xc3\xa8re"), b"lumi\xc3\xa8re") - self.failUnlessReallyEqual(to_str(b"\xFF"), b"\xFF") # passes through invalid UTF-8 -- is this what we want? - self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), b"lumi\xc3\xa8re") - self.failUnlessReallyEqual(to_str(None), None) + def test_to_bytes(self): + self.failUnlessReallyEqual(to_bytes(b"foo"), b"foo") + self.failUnlessReallyEqual(to_bytes(b"lumi\xc3\xa8re"), b"lumi\xc3\xa8re") + self.failUnlessReallyEqual(to_bytes(b"\xFF"), b"\xFF") # passes through invalid UTF-8 -- is this what we want? + self.failUnlessReallyEqual(to_bytes(u"lumi\u00E8re"), b"lumi\xc3\xa8re") + self.failUnlessReallyEqual(to_bytes(None), None) def test_from_utf8_or_none(self): self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo") diff --git a/src/allmydata/test/web/test_grid.py b/src/allmydata/test/web/test_grid.py index 73c354567..2a52d98e5 100644 --- a/src/allmydata/test/web/test_grid.py +++ b/src/allmydata/test/web/test_grid.py @@ -10,7 +10,7 @@ from twisted.web import resource from twisted.trial import unittest from allmydata import uri, dirnode from allmydata.util import base32 -from allmydata.util.encodingutil import to_str +from allmydata.util.encodingutil import to_bytes from allmydata.util.consumer import download_to_data from allmydata.util.netstring import split_netstring from allmydata.unknown import UnknownNode @@ -367,13 +367,13 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi f = data[1]["children"][name] self.failUnlessEqual(f[0], "unknown") if expect_rw_uri: - self.failUnlessReallyEqual(to_str(f[1]["rw_uri"]), unknown_rwcap, data) + self.failUnlessReallyEqual(to_bytes(f[1]["rw_uri"]), unknown_rwcap, data) else: self.failIfIn("rw_uri", f[1]) if immutable: - self.failUnlessReallyEqual(to_str(f[1]["ro_uri"]), unknown_immcap, data) + self.failUnlessReallyEqual(to_bytes(f[1]["ro_uri"]), unknown_immcap, data) else: - self.failUnlessReallyEqual(to_str(f[1]["ro_uri"]), unknown_rocap, data) + self.failUnlessReallyEqual(to_bytes(f[1]["ro_uri"]), unknown_rocap, data) self.failUnlessIn("metadata", f[1]) d.addCallback(_check_directory_json, expect_rw_uri=not immutable) @@ -406,18 +406,18 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi data = json.loads(res) self.failUnlessEqual(data[0], "unknown") if expect_rw_uri: - self.failUnlessReallyEqual(to_str(data[1]["rw_uri"]), unknown_rwcap, data) + self.failUnlessReallyEqual(to_bytes(data[1]["rw_uri"]), unknown_rwcap, data) else: self.failIfIn("rw_uri", data[1]) if immutable: - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), unknown_immcap, data) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), unknown_immcap, data) self.failUnlessReallyEqual(data[1]["mutable"], False) elif expect_rw_uri: - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), unknown_rocap, data) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), unknown_rocap, data) self.failUnlessReallyEqual(data[1]["mutable"], True) else: - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), unknown_rocap, data) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), unknown_rocap, data) self.failIfIn("mutable", data[1]) # TODO: check metadata contents @@ -581,7 +581,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi ll_type, ll_data = listed_children[u"lonely"] self.failUnlessEqual(ll_type, "filenode") self.failIfIn("rw_uri", ll_data) - self.failUnlessReallyEqual(to_str(ll_data["ro_uri"]), lonely_uri) + self.failUnlessReallyEqual(to_bytes(ll_data["ro_uri"]), lonely_uri) d.addCallback(_check_json) return d @@ -643,14 +643,14 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi u0 = units[0] self.failUnlessEqual(u0["path"], []) self.failUnlessEqual(u0["type"], "directory") - self.failUnlessReallyEqual(to_str(u0["cap"]), self.rootnode.get_uri()) + self.failUnlessReallyEqual(to_bytes(u0["cap"]), self.rootnode.get_uri()) u0cr = u0["check-results"] self.failUnlessReallyEqual(u0cr["results"]["count-happiness"], 10) self.failUnlessReallyEqual(u0cr["results"]["count-shares-good"], 10) ugood = [u for u in units if u["type"] == "file" and u["path"] == [u"good"]][0] - self.failUnlessReallyEqual(to_str(ugood["cap"]), self.uris["good"]) + self.failUnlessReallyEqual(to_bytes(ugood["cap"]), self.uris["good"]) ugoodcr = ugood["check-results"] self.failUnlessReallyEqual(ugoodcr["results"]["count-happiness"], 10) self.failUnlessReallyEqual(ugoodcr["results"]["count-shares-good"], 10) @@ -672,7 +672,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessEqual(units[-1]["type"], "stats") first = units[0] self.failUnlessEqual(first["path"], []) - self.failUnlessEqual(to_str(first["cap"]), self.rootnode.get_uri()) + self.failUnlessEqual(to_bytes(first["cap"]), self.rootnode.get_uri()) self.failUnlessEqual(first["type"], "directory") stats = units[-1]["stats"] self.failUnlessReallyEqual(stats["count-immutable-files"], 2) @@ -826,7 +826,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi u0 = units[0] self.failUnlessEqual(u0["path"], []) self.failUnlessEqual(u0["type"], "directory") - self.failUnlessReallyEqual(to_str(u0["cap"]), self.rootnode.get_uri()) + self.failUnlessReallyEqual(to_bytes(u0["cap"]), self.rootnode.get_uri()) u0crr = u0["check-and-repair-results"] self.failUnlessReallyEqual(u0crr["repair-attempted"], False) self.failUnlessReallyEqual(u0crr["pre-repair-results"]["results"]["count-happiness"], 10) @@ -834,7 +834,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi ugood = [u for u in units if u["type"] == "file" and u["path"] == [u"good"]][0] - self.failUnlessEqual(to_str(ugood["cap"]), self.uris["good"]) + self.failUnlessEqual(to_bytes(ugood["cap"]), self.uris["good"]) ugoodcrr = ugood["check-and-repair-results"] self.failUnlessReallyEqual(ugoodcrr["repair-attempted"], False) self.failUnlessReallyEqual(ugoodcrr["pre-repair-results"]["results"]["count-happiness"], 10) @@ -842,7 +842,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi usick = [u for u in units if u["type"] == "file" and u["path"] == [u"sick"]][0] - self.failUnlessReallyEqual(to_str(usick["cap"]), self.uris["sick"]) + self.failUnlessReallyEqual(to_bytes(usick["cap"]), self.uris["sick"]) usickcrr = usick["check-and-repair-results"] self.failUnlessReallyEqual(usickcrr["repair-attempted"], True) self.failUnlessReallyEqual(usickcrr["repair-successful"], True) diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index b4d604ed4..0dc12a4c5 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -36,7 +36,7 @@ from allmydata.nodemaker import NodeMaker from allmydata.web.common import WebError, MultiFormatPage from allmydata.util import fileutil, base32, hashutil from allmydata.util.consumer import download_to_data -from allmydata.util.encodingutil import to_str +from allmydata.util.encodingutil import to_bytes from ...util.connection_status import ConnectionStatus from ..common import ( EMPTY_CLIENT_CONFIG, @@ -457,8 +457,8 @@ class WebMixin(TimezoneMixin): self.failUnless(isinstance(data[1], dict)) self.failIf(data[1]["mutable"]) self.failIfIn("rw_uri", data[1]) # immutable - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), self._bar_txt_uri) - self.failUnlessReallyEqual(to_str(data[1]["verify_uri"]), self._bar_txt_verifycap) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), self._bar_txt_uri) + self.failUnlessReallyEqual(to_bytes(data[1]["verify_uri"]), self._bar_txt_verifycap) self.failUnlessReallyEqual(data[1]["size"], len(self.BAR_CONTENTS)) def failUnlessIsQuuxJSON(self, res, readonly=False): @@ -487,9 +487,9 @@ class WebMixin(TimezoneMixin): self.failUnless(isinstance(data[1], dict)) self.failUnless(data[1]["mutable"]) self.failUnlessIn("rw_uri", data[1]) # mutable - self.failUnlessReallyEqual(to_str(data[1]["rw_uri"]), self._foo_uri) - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), self._foo_readonly_uri) - self.failUnlessReallyEqual(to_str(data[1]["verify_uri"]), self._foo_verifycap) + self.failUnlessReallyEqual(to_bytes(data[1]["rw_uri"]), self._foo_uri) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), self._foo_readonly_uri) + self.failUnlessReallyEqual(to_bytes(data[1]["verify_uri"]), self._foo_verifycap) kidnames = sorted([unicode(n) for n in data[1]["children"]]) self.failUnlessEqual(kidnames, @@ -506,19 +506,19 @@ class WebMixin(TimezoneMixin): self.failUnlessIn("linkmotime", tahoe_md) self.failUnlessEqual(kids[u"bar.txt"][0], "filenode") self.failUnlessReallyEqual(kids[u"bar.txt"][1]["size"], len(self.BAR_CONTENTS)) - self.failUnlessReallyEqual(to_str(kids[u"bar.txt"][1]["ro_uri"]), self._bar_txt_uri) - self.failUnlessReallyEqual(to_str(kids[u"bar.txt"][1]["verify_uri"]), + self.failUnlessReallyEqual(to_bytes(kids[u"bar.txt"][1]["ro_uri"]), self._bar_txt_uri) + self.failUnlessReallyEqual(to_bytes(kids[u"bar.txt"][1]["verify_uri"]), self._bar_txt_verifycap) self.failUnlessIn("metadata", kids[u"bar.txt"][1]) self.failUnlessIn("tahoe", kids[u"bar.txt"][1]["metadata"]) self.failUnlessReallyEqual(kids[u"bar.txt"][1]["metadata"]["tahoe"]["linkcrtime"], self._bar_txt_metadata["tahoe"]["linkcrtime"]) - self.failUnlessReallyEqual(to_str(kids[u"n\u00fc.txt"][1]["ro_uri"]), + self.failUnlessReallyEqual(to_bytes(kids[u"n\u00fc.txt"][1]["ro_uri"]), self._bar_txt_uri) self.failUnlessIn("quux.txt", kids) - self.failUnlessReallyEqual(to_str(kids[u"quux.txt"][1]["rw_uri"]), + self.failUnlessReallyEqual(to_bytes(kids[u"quux.txt"][1]["rw_uri"]), self._quux_txt_uri) - self.failUnlessReallyEqual(to_str(kids[u"quux.txt"][1]["ro_uri"]), + self.failUnlessReallyEqual(to_bytes(kids[u"quux.txt"][1]["ro_uri"]), self._quux_txt_readonly_uri) @inlineCallbacks @@ -2181,7 +2181,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi got = {} for (path_list, cap) in data: got[tuple(path_list)] = cap - self.failUnlessReallyEqual(to_str(got[(u"sub",)]), self._sub_uri) + self.failUnlessReallyEqual(to_bytes(got[(u"sub",)]), self._sub_uri) self.failUnlessIn((u"sub", u"baz.txt"), got) self.failUnlessIn("finished", res) self.failUnlessIn("origin", res) @@ -2266,9 +2266,9 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi self.failUnlessEqual(units[-1]["type"], "stats") first = units[0] self.failUnlessEqual(first["path"], []) - self.failUnlessReallyEqual(to_str(first["cap"]), self._foo_uri) + self.failUnlessReallyEqual(to_bytes(first["cap"]), self._foo_uri) self.failUnlessEqual(first["type"], "directory") - baz = [u for u in units[:-1] if to_str(u["cap"]) == self._baz_file_uri][0] + baz = [u for u in units[:-1] if to_bytes(u["cap"]) == self._baz_file_uri][0] self.failUnlessEqual(baz["path"], ["sub", "baz.txt"]) self.failIfEqual(baz["storage-index"], None) self.failIfEqual(baz["verifycap"], None) @@ -2281,14 +2281,14 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi def test_GET_DIRURL_uri(self): d = self.GET(self.public_url + "/foo?t=uri") def _check(res): - self.failUnlessReallyEqual(to_str(res), self._foo_uri) + self.failUnlessReallyEqual(to_bytes(res), self._foo_uri) d.addCallback(_check) return d def test_GET_DIRURL_readonly_uri(self): d = self.GET(self.public_url + "/foo?t=readonly-uri") def _check(res): - self.failUnlessReallyEqual(to_str(res), self._foo_readonly_uri) + self.failUnlessReallyEqual(to_bytes(res), self._foo_readonly_uri) d.addCallback(_check) return d @@ -2950,9 +2950,9 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi new_json = children[u"new.txt"] self.failUnlessEqual(new_json[0], "filenode") self.failUnless(new_json[1]["mutable"]) - self.failUnlessReallyEqual(to_str(new_json[1]["rw_uri"]), self._mutable_uri) + self.failUnlessReallyEqual(to_bytes(new_json[1]["rw_uri"]), self._mutable_uri) ro_uri = self._mutable_node.get_readonly().to_string() - self.failUnlessReallyEqual(to_str(new_json[1]["ro_uri"]), ro_uri) + self.failUnlessReallyEqual(to_bytes(new_json[1]["ro_uri"]), ro_uri) d.addCallback(_check_page_json) # and the JSON form of the file @@ -2962,9 +2962,9 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi parsed = json.loads(res) self.failUnlessEqual(parsed[0], "filenode") self.failUnless(parsed[1]["mutable"]) - self.failUnlessReallyEqual(to_str(parsed[1]["rw_uri"]), self._mutable_uri) + self.failUnlessReallyEqual(to_bytes(parsed[1]["rw_uri"]), self._mutable_uri) ro_uri = self._mutable_node.get_readonly().to_string() - self.failUnlessReallyEqual(to_str(parsed[1]["ro_uri"]), ro_uri) + self.failUnlessReallyEqual(to_bytes(parsed[1]["ro_uri"]), ro_uri) d.addCallback(_check_file_json) # and look at t=uri and t=readonly-uri diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 07416cc93..24e420496 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -155,11 +155,15 @@ def unicode_to_url(s): # According to RFC 2718, non-ascii characters in URLs must be UTF-8 encoded. # FIXME - return to_str(s) + return to_bytes(s) #precondition(isinstance(s, unicode), s) #return s.encode('utf-8') -def to_str(s): # TODO rename to to_bytes +def to_bytes(s): + """Convert unicode to bytes. + + None and bytes are passed through unchanged. + """ if s is None or isinstance(s, bytes): return s return s.encode('utf-8') @@ -274,7 +278,7 @@ def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None): return b'"%s"' % (escaped.encode(encoding or io_encoding, 'backslashreplace'),) def quote_path(path, quotemarks=True): - return quote_output(b"/".join(map(to_str, path)), quotemarks=quotemarks, quote_newlines=True) + return quote_output(b"/".join(map(to_bytes, path)), quotemarks=quotemarks, quote_newlines=True) def quote_local_unicode_path(path, quotemarks=True): precondition(isinstance(path, unicode), path) diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index a930fd2b1..788557480 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -18,7 +18,7 @@ from allmydata.mutable.common import UnrecoverableFileError from allmydata.util import abbreviate from allmydata.util.hashutil import timing_safe_compare from allmydata.util.time_format import format_time, format_delta -from allmydata.util.encodingutil import to_str, quote_output +from allmydata.util.encodingutil import to_bytes, quote_output def get_filenode_metadata(filenode): @@ -133,8 +133,8 @@ def convert_children_json(nodemaker, children_json): data = json.loads(children_json) for (namex, (ctype, propdict)) in data.iteritems(): namex = unicode(namex) - writecap = to_str(propdict.get("rw_uri")) - readcap = to_str(propdict.get("ro_uri")) + writecap = to_bytes(propdict.get("rw_uri")) + readcap = to_bytes(propdict.get("ro_uri")) metadata = propdict.get("metadata", {}) # name= argument is just for error reporting childnode = nodemaker.create_from_cap(writecap, readcap, name=namex) diff --git a/src/allmydata/web/directory.py b/src/allmydata/web/directory.py index a5ba3bed5..9fdecbcb4 100644 --- a/src/allmydata/web/directory.py +++ b/src/allmydata/web/directory.py @@ -22,7 +22,7 @@ from twisted.python.filepath import FilePath from allmydata.util import base32 from allmydata.util.encodingutil import ( - to_str, + to_bytes, quote_output, ) from allmydata.uri import ( @@ -484,7 +484,7 @@ class DirectoryNodeHandler(ReplaceMeMixin, Resource, object): to_dir = to_dir.decode(charset) assert isinstance(to_dir, unicode) to_path = to_dir.split(u"/") - to_root = self.client.nodemaker.create_from_cap(to_str(to_path[0])) + to_root = self.client.nodemaker.create_from_cap(to_bytes(to_path[0])) if not IDirectoryNode.providedBy(to_root): raise WebError("to_dir is not a directory", http.BAD_REQUEST) d = to_root.get_child_at_path(to_path[1:]) From 03ed0fd66fba26d622167000dc3f41717e8d7e00 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 17 Aug 2020 11:29:09 -0400 Subject: [PATCH 2/8] Another function that should be a no-op on Python 3. --- src/allmydata/test/test_encodingutil.py | 8 +++++++- src/allmydata/util/encodingutil.py | 7 +++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 208b0f98d..61e90c068 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -144,6 +144,7 @@ class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): argv_to_unicode, lumiere_nfc.encode('latin1')) + @skipIf(PY3, "Python 2 only.") def test_unicode_to_output(self): encodingutil.io_encoding = 'koi8-r' self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) @@ -228,7 +229,8 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") - def test_unicode_to_output(self): + @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") + def test_unicode_to_output_py2(self): if 'argv' not in dir(self): return @@ -239,6 +241,10 @@ class EncodingUtil(ReallyEqualMixin): _reload() self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv) + @skipIf(PY2, "Python 3 only.") + def test_unicode_to_output_py3(self): + self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) + @skipIf(PY3, "Python 2 only.") def test_unicode_to_argv_py2(self): """unicode_to_argv() converts to bytes on Python 2.""" diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 24e420496..bfcbb13d9 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -21,6 +21,7 @@ from past.builtins import unicode import sys, os, re, locale import unicodedata +import warnings from allmydata.util.assertutil import precondition, _assert from twisted.python import usage @@ -140,6 +141,8 @@ def unicode_to_argv(s, mangle=False): """ precondition(isinstance(s, unicode), s) if PY3: + warnings.warn("This will be unnecessary once Python 2 is dropped.", + DeprecationWarning) return s if mangle and sys.platform == "win32": @@ -185,6 +188,10 @@ def unicode_to_output(s): Encode an unicode object for representation on stdout or stderr. """ precondition(isinstance(s, unicode), s) + if PY3: + warnings.warn("This will be unnecessary once Python 2 is dropped.", + DeprecationWarning) + return s try: out = s.encode(io_encoding) From af5e9eaf67f7af6959b63b519b8197dfc06618ba Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 17 Aug 2020 11:29:27 -0400 Subject: [PATCH 3/8] A better name. --- src/allmydata/util/encodingutil.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index bfcbb13d9..1d41f337a 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -224,7 +224,7 @@ def _unicode_escape(m, quote_newlines): else: return u'\\x%02x' % (codepoint,) -def _str_escape(m, quote_newlines): # TODO rename to _bytes_escape +def _bytes_escape(m, quote_newlines): """ Takes a re match on bytes, the result is escaped bytes of group(0). """ @@ -268,7 +268,7 @@ def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None): try: s = s.decode('utf-8') except UnicodeDecodeError: - return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _str_escape(m, quote_newlines), s),) + return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _bytes_escape(m, quote_newlines), s),) must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE if must_double_quote.search(s) is None: From f95f9c481eb9a9d3a728e92de7df32e47710e7a1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 17 Aug 2020 12:04:52 -0400 Subject: [PATCH 4/8] Explanation. --- src/allmydata/util/encodingutil.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 1d41f337a..56c6deb88 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -186,6 +186,9 @@ def is_printable_ascii(s): def unicode_to_output(s): """ Encode an unicode object for representation on stdout or stderr. + + On Python 3 just returns the string unchanged, since it is not necessary to + encode in any way. """ precondition(isinstance(s, unicode), s) if PY3: From 97c3be05093f6e62f8ac8f5b8efa7c9505d103d1 Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 17 Aug 2020 13:29:49 -0400 Subject: [PATCH 5/8] Make quote_* and friends return unicode. --- src/allmydata/test/test_encodingutil.py | 55 ++++++++++++++++--------- src/allmydata/util/encodingutil.py | 53 ++++++++++++++++-------- 2 files changed, 71 insertions(+), 37 deletions(-) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 61e90c068..376bd6ec6 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -355,6 +355,8 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): _reload() def _check(self, inp, out, enc, optional_quotes, quote_newlines): + if PY3 and isinstance(out, bytes): + out = out.decode(enc or encodingutil.io_encoding) out2 = out if optional_quotes: out2 = out2[1:-1] @@ -382,6 +384,9 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): def _test_quote_output_all(self, enc): def check(inp, out, optional_quotes=False, quote_newlines=None): + if PY3: + # Result is always Unicode on Python 3 + out = out.decode("ascii") self._check(inp, out, enc, optional_quotes, quote_newlines) # optional single quotes @@ -444,7 +449,10 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): def test_quote_output_utf8(self, enc='utf-8'): def check(inp, out, optional_quotes=False, quote_newlines=None): - self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines) + if PY2: + # On Python 3 output is always Unicode: + out = out.encode('utf-8') + self._check(inp, out, enc, optional_quotes, quote_newlines) self._test_quote_output_all(enc) check(u"\u2621", u"'\u2621'", True) @@ -469,43 +477,50 @@ def win32_other(win32, other): return win32 if sys.platform == "win32" else other class QuotePaths(ReallyEqualMixin, unittest.TestCase): - def test_quote_path(self): - self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), b"'foo/bar'") - self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'") - self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar") - self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"') - self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"') - self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"') - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"), + def assertPathsEqual(self, actual, expected): + if PY3: + # On Python 3, results should be unicode: + expected = expected.decode("ascii") + self.failUnlessReallyEqual(actual, expected) + + def test_quote_path(self): + self.assertPathsEqual(quote_path([u'foo', u'bar']), b"'foo/bar'") + self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'") + self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar") + self.assertPathsEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"') + self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"') + self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"') + + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"), win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True), win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False), win32_other(b"C:\\foo", b"\\\\?\\C:\\foo")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"), win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True), win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False), win32_other(b"\\\\foo\\bar", b"\\\\?\\UNC\\foo\\bar")) def test_quote_filepath(self): foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar')) - self.failUnlessReallyEqual(quote_filepath(foo_bar_fp), + self.assertPathsEqual(quote_filepath(foo_bar_fp), win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'")) - self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=True), + self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=True), win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'")) - self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=False), + self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=False), win32_other(b"C:\\foo\\bar", b"/foo/bar")) if sys.platform == "win32": foo_longfp = FilePath(u'\\\\?\\C:\\foo') - self.failUnlessReallyEqual(quote_filepath(foo_longfp), + self.assertPathsEqual(quote_filepath(foo_longfp), b"'C:\\foo'") - self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=True), + self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=True), b"'C:\\foo'") - self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=False), + self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=False), b"C:\\foo") diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 56c6deb88..35ee7f64c 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -262,30 +262,49 @@ def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None): Python-compatible backslash escaping is used. If not explicitly given, quote_newlines is True when quotemarks is True. + + On Python 3, returns Unicode strings. """ precondition(isinstance(s, (bytes, unicode)), s) + encoding = encoding or io_encoding + if quote_newlines is None: quote_newlines = quotemarks - if isinstance(s, bytes): - try: - s = s.decode('utf-8') - except UnicodeDecodeError: - return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _bytes_escape(m, quote_newlines), s),) + def _encode(s): + if isinstance(s, bytes): + try: + s = s.decode('utf-8') + except UnicodeDecodeError: + return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _bytes_escape(m, quote_newlines), s),) - must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE - if must_double_quote.search(s) is None: - try: - out = s.encode(encoding or io_encoding) - if quotemarks or out.startswith(b'"'): - return b"'%s'" % (out,) - else: - return out - except (UnicodeDecodeError, UnicodeEncodeError): - pass + must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE + if must_double_quote.search(s) is None: + try: + out = s.encode(encoding) + if quotemarks or out.startswith(b'"'): + return b"'%s'" % (out,) + else: + return out + except (UnicodeDecodeError, UnicodeEncodeError): + pass + + escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s) + return b'"%s"' % (escaped.encode(encoding, 'backslashreplace'),) + + result = _encode(s) + if PY3: + # On Python half of what this function does is unnecessary, since + # output is always Unicode. To ensure no encode errors, one can do: + # + # sys.stdout.reconfigure(encoding=sys.stdout.encoding, errors="backslashreplace") + # + # Although the problem is that doesn't work in Python 3.6, only 3.7 or + # later... For now not thinking about it, just returning unicode since + # that is the right thing to do on Python 3. + result = result.decode(encoding) + return result - escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s) - return b'"%s"' % (escaped.encode(encoding or io_encoding, 'backslashreplace'),) def quote_path(path, quotemarks=True): return quote_output(b"/".join(map(to_bytes, path)), quotemarks=quotemarks, quote_newlines=True) From 01ba0d0316860f18a7cf4b3994c971b5893aaf6f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Mon, 17 Aug 2020 13:30:12 -0400 Subject: [PATCH 6/8] News file. --- newsfragments/3378.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/3378.minor diff --git a/newsfragments/3378.minor b/newsfragments/3378.minor new file mode 100644 index 000000000..e69de29bb From 5fc95d569fafb29556782f89bb06881aac9a529f Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 20 Aug 2020 12:39:38 -0400 Subject: [PATCH 7/8] Improve explanations. --- src/allmydata/util/encodingutil.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 35ee7f64c..17a7a2f38 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -84,9 +84,12 @@ def _reload(): # Despite the Unicode-mode FilePath support added to Twisted in # , we can't yet use - # Unicode-mode FilePaths with INotify on non-Windows platforms - # due to . Supposedly - # 7928 is fixed, though... + # Unicode-mode FilePaths with INotify on non-Windows platforms due to + # . Supposedly 7928 is fixed, + # though... and Tahoe-LAFS doesn't use inotify anymore! + # + # In the interest of not breaking anything, this logic is unchanged for + # Python 2, but on Python 3 the paths are always unicode, like it or not. use_unicode_filepath = PY3 or sys.platform == "win32" _reload() @@ -187,8 +190,8 @@ def unicode_to_output(s): """ Encode an unicode object for representation on stdout or stderr. - On Python 3 just returns the string unchanged, since it is not necessary to - encode in any way. + On Python 3 just returns the unicode string unchanged, since encoding is + the responsibility of stdout/stderr, they expect Unicode by default. """ precondition(isinstance(s, unicode), s) if PY3: @@ -294,8 +297,9 @@ def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None): result = _encode(s) if PY3: - # On Python half of what this function does is unnecessary, since - # output is always Unicode. To ensure no encode errors, one can do: + # On Python 3 half of what this function does is unnecessary, since + # sys.stdout typically expects Unicode. To ensure no encode errors, one + # can do: # # sys.stdout.reconfigure(encoding=sys.stdout.encoding, errors="backslashreplace") # From eee3978243f5534e24b07bbd1a758094ac15973c Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Thu, 20 Aug 2020 13:34:00 -0400 Subject: [PATCH 8/8] Explain a bit better. --- src/allmydata/test/test_crawler.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 38bc8dcc6..1ed217251 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -11,7 +11,9 @@ from __future__ import unicode_literals from future.utils import PY2, PY3 if PY2: - # Don't use future bytes, since it breaks tests. + # Don't use future bytes, since it breaks tests. No further works is + # needed, once we're only on Python 3 we'll be deleting this future imports + # anyway, and tests pass just fine on Python 3. from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, str, max, min # noqa: F401 import time