diff --git a/.circleci/config.yml b/.circleci/config.yml index 9f7381f33..c15eb1746 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -508,6 +508,7 @@ jobs: environment: DISTRO: "ubuntu" TAG: "20.04" + PYTHON_VERSION: "2.7" build-image-centos-8: diff --git a/newsfragments/3367.minor b/newsfragments/3367.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3378.minor b/newsfragments/3378.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3386.minor b/newsfragments/3386.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3388.minor b/newsfragments/3388.minor new file mode 100644 index 000000000..e69de29bb diff --git a/newsfragments/3389.minor b/newsfragments/3389.minor new file mode 100644 index 000000000..e69de29bb diff --git a/setup.py b/setup.py index a35023b8b..db5a5490d 100644 --- a/setup.py +++ b/setup.py @@ -117,7 +117,8 @@ install_requires = [ "eliot ~= 1.7", # A great way to define types of values. - "attrs >= 18.2.0", + # XXX: drop the upper bound: https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3390 + "attrs >= 18.2.0, < 20", # WebSocket library for twisted and asyncio "autobahn >= 19.5.2", diff --git a/src/allmydata/scripts/backupdb.py b/src/allmydata/scripts/backupdb.py index 234f8524a..d188eec3c 100644 --- a/src/allmydata/scripts/backupdb.py +++ b/src/allmydata/scripts/backupdb.py @@ -6,7 +6,7 @@ from allmydata.util.netstring import netstring from allmydata.util.hashutil import backupdb_dirhash from allmydata.util import base32 from allmydata.util.fileutil import abspath_expanduser_unicode -from allmydata.util.encodingutil import to_str +from allmydata.util.encodingutil import to_bytes from allmydata.util.dbutil import get_db, DBError @@ -218,7 +218,7 @@ class BackupDB_v2(object): probability = min(max(probability, 0.0), 1.0) should_check = bool(random.random() < probability) - return FileResult(self, to_str(filecap), should_check, + return FileResult(self, to_bytes(filecap), should_check, path, mtime, ctime, size) def get_or_allocate_fileid_for_cap(self, filecap): @@ -321,7 +321,7 @@ class BackupDB_v2(object): probability = min(max(probability, 0.0), 1.0) should_check = bool(random.random() < probability) - return DirectoryResult(self, dirhash_s, to_str(dircap), should_check) + return DirectoryResult(self, dirhash_s, to_bytes(dircap), should_check) def did_create_directory(self, dircap, dirhash): now = time.time() diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index fff58caff..789218f8b 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -63,7 +63,7 @@ def dump_immutable_chk_share(f, out, options): from allmydata import uri from allmydata.util import base32 from allmydata.immutable.layout import ReadBucketProxy - from allmydata.util.encodingutil import quote_output, to_str + from allmydata.util.encodingutil import quote_output, to_bytes # use a ReadBucketProxy to parse the bucket and find the uri extension bp = ReadBucketProxy(None, None, '') @@ -109,7 +109,7 @@ def dump_immutable_chk_share(f, out, options): # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: - piece = to_str(pieces[-2]) + piece = to_bytes(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) uri_extension_hash = base32.a2b(unpacked["UEB_hash"]) @@ -221,7 +221,7 @@ def dump_SDMF_share(m, length, options): from allmydata.mutable.common import NeedMoreDataError from allmydata.util import base32, hashutil from allmydata.uri import SSKVerifierURI - from allmydata.util.encodingutil import quote_output, to_str + from allmydata.util.encodingutil import quote_output, to_bytes offset = m.DATA_OFFSET @@ -269,7 +269,7 @@ def dump_SDMF_share(m, length, options): # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: - piece = to_str(pieces[-2]) + piece = to_bytes(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey) @@ -307,7 +307,7 @@ def dump_MDMF_share(m, length, options): from allmydata.mutable.layout import MDMFSlotReadProxy from allmydata.util import base32, hashutil from allmydata.uri import MDMFVerifierURI - from allmydata.util.encodingutil import quote_output, to_str + from allmydata.util.encodingutil import quote_output, to_bytes offset = m.DATA_OFFSET out = options.stdout @@ -363,7 +363,7 @@ def dump_MDMF_share(m, length, options): # knowing the parent directory name to get it pieces = options['filename'].split(os.sep) if len(pieces) >= 2: - piece = to_str(pieces[-2]) + piece = to_bytes(pieces[-2]) if base32.could_be_base32_encoded(piece): storage_index = base32.a2b(piece) fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey) diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py index 558c3d6d3..c63558eb1 100644 --- a/src/allmydata/scripts/tahoe_backup.py +++ b/src/allmydata/scripts/tahoe_backup.py @@ -11,7 +11,7 @@ from allmydata.scripts.common_http import do_http, HTTPError, format_http_error from allmydata.util import time_format from allmydata.scripts import backupdb from allmydata.util.encodingutil import listdir_unicode, quote_output, \ - quote_local_unicode_path, to_str, FilenameEncodingError, unicode_to_url + quote_local_unicode_path, to_bytes, FilenameEncodingError, unicode_to_url from allmydata.util.assertutil import precondition from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath @@ -47,7 +47,7 @@ def mkdir(contents, options): if resp.status < 200 or resp.status >= 300: raise HTTPError("Error during mkdir", resp) - dircap = to_str(resp.read().strip()) + dircap = to_bytes(resp.read().strip()) return dircap def put_child(dirurl, childname, childcap): diff --git a/src/allmydata/scripts/tahoe_cp.py b/src/allmydata/scripts/tahoe_cp.py index 5d0849c56..c90dca072 100644 --- a/src/allmydata/scripts/tahoe_cp.py +++ b/src/allmydata/scripts/tahoe_cp.py @@ -13,7 +13,7 @@ from allmydata import uri from allmydata.util import fileutil from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \ - quote_local_unicode_path, to_str + quote_local_unicode_path, to_bytes from allmydata.util.assertutil import precondition, _assert @@ -254,8 +254,8 @@ class TahoeDirectorySource(object): def init_from_parsed(self, parsed): nodetype, d = parsed - self.writecap = to_str(d.get("rw_uri")) - self.readcap = to_str(d.get("ro_uri")) + self.writecap = to_bytes(d.get("rw_uri")) + self.readcap = to_bytes(d.get("ro_uri")) self.mutable = d.get("mutable", False) # older nodes don't provide it self.children_d = dict( [(unicode(name),value) for (name,value) @@ -270,13 +270,13 @@ class TahoeDirectorySource(object): self.progressfunc("examining %d of %d" % (i+1, len(self.children_d))) if data[0] == "filenode": mutable = data[1].get("mutable", False) - writecap = to_str(data[1].get("rw_uri")) - readcap = to_str(data[1].get("ro_uri")) + writecap = to_bytes(data[1].get("rw_uri")) + readcap = to_bytes(data[1].get("ro_uri")) self.children[name] = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name) elif data[0] == "dirnode": - writecap = to_str(data[1].get("rw_uri")) - readcap = to_str(data[1].get("ro_uri")) + writecap = to_bytes(data[1].get("rw_uri")) + readcap = to_bytes(data[1].get("ro_uri")) if writecap and writecap in self.cache: child = self.cache[writecap] elif readcap and readcap in self.cache: @@ -324,8 +324,8 @@ class TahoeDirectoryTarget(object): def init_from_parsed(self, parsed): nodetype, d = parsed - self.writecap = to_str(d.get("rw_uri")) - self.readcap = to_str(d.get("ro_uri")) + self.writecap = to_bytes(d.get("rw_uri")) + self.readcap = to_bytes(d.get("ro_uri")) self.mutable = d.get("mutable", False) # older nodes don't provide it self.children_d = dict( [(unicode(name),value) for (name,value) @@ -365,8 +365,8 @@ class TahoeDirectoryTarget(object): self.progressfunc("examining %d of %d" % (i+1, len(self.children_d))) if data[0] == "filenode": mutable = data[1].get("mutable", False) - writecap = to_str(data[1].get("rw_uri")) - readcap = to_str(data[1].get("ro_uri")) + writecap = to_bytes(data[1].get("rw_uri")) + readcap = to_bytes(data[1].get("ro_uri")) url = None if self.writecap: url = self.nodeurl + "/".join(["uri", @@ -375,8 +375,8 @@ class TahoeDirectoryTarget(object): self.children[name] = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) elif data[0] == "dirnode": - writecap = to_str(data[1].get("rw_uri")) - readcap = to_str(data[1].get("ro_uri")) + writecap = to_bytes(data[1].get("rw_uri")) + readcap = to_bytes(data[1].get("ro_uri")) if writecap and writecap in self.cache: child = self.cache[writecap] elif readcap and readcap in self.cache: @@ -619,8 +619,8 @@ class Copier(object): self.progress) t.init_from_parsed(parsed) else: - writecap = to_str(d.get("rw_uri")) - readcap = to_str(d.get("ro_uri")) + writecap = to_bytes(d.get("rw_uri")) + readcap = to_bytes(d.get("ro_uri")) mutable = d.get("mutable", False) t = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) @@ -682,8 +682,8 @@ class Copier(object): else: if had_trailing_slash: raise FilenameWithTrailingSlashError(source_spec) - writecap = to_str(d.get("rw_uri")) - readcap = to_str(d.get("ro_uri")) + writecap = to_bytes(d.get("rw_uri")) + readcap = to_bytes(d.get("ro_uri")) mutable = d.get("mutable", False) # older nodes don't provide it t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name) return t diff --git a/src/allmydata/scripts/tahoe_ls.py b/src/allmydata/scripts/tahoe_ls.py index 1332da32a..2bfe16d27 100644 --- a/src/allmydata/scripts/tahoe_ls.py +++ b/src/allmydata/scripts/tahoe_ls.py @@ -5,7 +5,7 @@ import json from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.encodingutil import unicode_to_output, quote_output, is_printable_ascii, to_str +from allmydata.util.encodingutil import unicode_to_output, quote_output, is_printable_ascii, to_bytes def list(options): nodeurl = options['node-url'] @@ -94,8 +94,8 @@ def list(options): mtime = child[1].get("metadata", {}).get('tahoe', {}).get("linkmotime") if not mtime: mtime = child[1]["metadata"].get("mtime") - rw_uri = to_str(child[1].get("rw_uri")) - ro_uri = to_str(child[1].get("ro_uri")) + rw_uri = to_bytes(child[1].get("rw_uri")) + ro_uri = to_bytes(child[1].get("ro_uri")) if ctime: # match for formatting that GNU 'ls' does if (now - ctime) > 6*30*24*60*60: diff --git a/src/allmydata/scripts/tahoe_mv.py b/src/allmydata/scripts/tahoe_mv.py index 4caaca928..7d13ea72a 100644 --- a/src/allmydata/scripts/tahoe_mv.py +++ b/src/allmydata/scripts/tahoe_mv.py @@ -6,7 +6,7 @@ import json from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.encodingutil import to_str +from allmydata.util.encodingutil import to_bytes # this script is used for both 'mv' and 'ln' @@ -35,7 +35,7 @@ def mv(options, mode="move"): return 1 data = resp.read() nodetype, attrs = json.loads(data) - cap = to_str(attrs.get("rw_uri") or attrs["ro_uri"]) + cap = to_bytes(attrs.get("rw_uri") or attrs["ro_uri"]) # now get the target try: diff --git a/src/allmydata/storage/common.py b/src/allmydata/storage/common.py index 865275bc1..55036eea7 100644 --- a/src/allmydata/storage/common.py +++ b/src/allmydata/storage/common.py @@ -1,3 +1,4 @@ +from future.utils import PY3 import os.path from allmydata.util import base32 @@ -17,5 +18,12 @@ def si_a2b(ascii_storageindex): return base32.a2b(ascii_storageindex) def storage_index_to_dir(storageindex): + """Convert storage index to directory path. + + Returns native string. + """ sia = si_b2a(storageindex) + if PY3: + # On Python 3 we expect paths to be unicode. + sia = sia.decode("ascii") return os.path.join(sia[:2], sia) diff --git a/src/allmydata/storage/crawler.py b/src/allmydata/storage/crawler.py index 14139d81e..24042c38b 100644 --- a/src/allmydata/storage/crawler.py +++ b/src/allmydata/storage/crawler.py @@ -1,3 +1,19 @@ +""" +Crawl the storage server shares. + +Ported to Python 3. +""" + +from __future__ import unicode_literals +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from future.utils import PY2, PY3 +if PY2: + # We don't import bytes, object, dict, and list just in case they're used, + # so as not to create brittle pickles with random magic objects. + from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, range, str, max, min # noqa: F401 import os, time, struct try: @@ -77,6 +93,9 @@ class ShareCrawler(service.MultiService): self.statefile = statefile self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] for i in range(2**10)] + if PY3: + # On Python 3 we expect the paths to be unicode, not bytes. + self.prefixes = [p.decode("ascii") for p in self.prefixes] self.prefixes.sort() self.timer = None self.bucket_cache = (None, []) @@ -356,7 +375,8 @@ class ShareCrawler(service.MultiService): """ for bucket in buckets: - if bucket <= self.state["last-complete-bucket"]: + last_complete = self.state["last-complete-bucket"] + if last_complete is not None and bucket <= last_complete: continue self.process_bucket(cycle, prefix, prefixdir, bucket) self.state["last-complete-bucket"] = bucket diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index 5e3e01716..4d2dce8c4 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -8,7 +8,7 @@ class LeaseInfo(object): self.cancel_secret = cancel_secret self.expiration_time = expiration_time if nodeid is not None: - assert isinstance(nodeid, str) + assert isinstance(nodeid, bytes) assert len(nodeid) == 20 self.nodeid = nodeid diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 26823957e..3ffb58b68 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -49,7 +49,7 @@ class StorageServer(service.MultiService, Referenceable): expiration_cutoff_date=None, expiration_sharetypes=("mutable", "immutable")): service.MultiService.__init__(self) - assert isinstance(nodeid, str) + assert isinstance(nodeid, bytes) assert len(nodeid) == 20 self.my_nodeid = nodeid self.storedir = storedir diff --git a/src/allmydata/test/cli/test_check.py b/src/allmydata/test/cli/test_check.py index a2476e6c7..85649e262 100644 --- a/src/allmydata/test/cli/test_check.py +++ b/src/allmydata/test/cli/test_check.py @@ -5,7 +5,7 @@ from six.moves import cStringIO as StringIO from allmydata import uri from allmydata.util import base32 -from allmydata.util.encodingutil import quote_output, to_str +from allmydata.util.encodingutil import quote_output, to_bytes from allmydata.mutable.publish import MutableData from allmydata.immutable import upload from allmydata.scripts import debug @@ -41,7 +41,7 @@ class Check(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessReallyEqual(err, "") self.failUnlessReallyEqual(rc, 0) data = json.loads(out) - self.failUnlessReallyEqual(to_str(data["summary"]), "Healthy") + self.failUnlessReallyEqual(to_bytes(data["summary"]), "Healthy") self.failUnlessReallyEqual(data["results"]["healthy"], True) d.addCallback(_check2) diff --git a/src/allmydata/test/cli/test_cp.py b/src/allmydata/test/cli/test_cp.py index 7b076f327..59331029b 100644 --- a/src/allmydata/test/cli/test_cp.py +++ b/src/allmydata/test/cli/test_cp.py @@ -8,7 +8,7 @@ from twisted.internet import defer from allmydata.scripts import cli from allmydata.util import fileutil from allmydata.util.encodingutil import (quote_output, get_io_encoding, - unicode_to_output, to_str) + unicode_to_output, to_bytes) from allmydata.util.assertutil import _assert from ..no_network import GridTestMixin from .common import CLITestMixin @@ -272,9 +272,9 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnless(data['mutable']) self.failUnlessIn("rw_uri", data) - self.rw_uri = to_str(data["rw_uri"]) + self.rw_uri = to_bytes(data["rw_uri"]) self.failUnlessIn("ro_uri", data) - self.ro_uri = to_str(data["ro_uri"]) + self.ro_uri = to_bytes(data["ro_uri"]) d.addCallback(_get_test_txt_uris) # Now make a new file to copy in place of test.txt. @@ -306,9 +306,9 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnless(data['mutable']) self.failUnlessIn("ro_uri", data) - self.failUnlessEqual(to_str(data["ro_uri"]), self.ro_uri) + self.failUnlessEqual(to_bytes(data["ro_uri"]), self.ro_uri) self.failUnlessIn("rw_uri", data) - self.failUnlessEqual(to_str(data["rw_uri"]), self.rw_uri) + self.failUnlessEqual(to_bytes(data["rw_uri"]), self.rw_uri) d.addCallback(_check_json) # and, finally, doing a GET directly on one of the old uris @@ -381,7 +381,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failIf(childdata['mutable']) self.failUnlessIn("ro_uri", childdata) uri_key = "ro_uri" - self.childuris[k] = to_str(childdata[uri_key]) + self.childuris[k] = to_bytes(childdata[uri_key]) d.addCallback(_process_directory_json) # Now build a local directory to copy into place, like the following: # test2/ @@ -410,11 +410,11 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): if "mutable" in fn: self.failUnless(data['mutable']) self.failUnlessIn("rw_uri", data) - self.failUnlessEqual(to_str(data["rw_uri"]), self.childuris[fn]) + self.failUnlessEqual(to_bytes(data["rw_uri"]), self.childuris[fn]) else: self.failIf(data['mutable']) self.failUnlessIn("ro_uri", data) - self.failIfEqual(to_str(data["ro_uri"]), self.childuris[fn]) + self.failIfEqual(to_bytes(data["ro_uri"]), self.childuris[fn]) for fn in ("mutable1", "mutable2"): d.addCallback(lambda ignored, fn=fn: @@ -456,7 +456,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessEqual(filetype, "filenode") self.failIf(data['mutable']) self.failUnlessIn("ro_uri", data) - self.failUnlessEqual(to_str(data["ro_uri"]), self.childuris["imm2"]) + self.failUnlessEqual(to_bytes(data["ro_uri"]), self.childuris["imm2"]) d.addCallback(_process_imm2_json) return d @@ -497,7 +497,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessEqual(filetype, "filenode") self.failUnless(data['mutable']) self.failUnlessIn("ro_uri", data) - self._test_read_uri = to_str(data["ro_uri"]) + self._test_read_uri = to_bytes(data["ro_uri"]) d.addCallback(_process_test_json) # Now we'll link the readonly URI into the tahoe: alias. d.addCallback(lambda ignored: @@ -521,7 +521,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessEqual(testtype, "filenode") self.failUnless(testdata['mutable']) self.failUnlessIn("ro_uri", testdata) - self.failUnlessEqual(to_str(testdata["ro_uri"]), self._test_read_uri) + self.failUnlessEqual(to_bytes(testdata["ro_uri"]), self._test_read_uri) self.failIfIn("rw_uri", testdata) d.addCallback(_process_tahoe_json) # Okay, now we're going to try uploading another mutable file in @@ -589,7 +589,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessEqual(file2type, "filenode") self.failUnless(file2data['mutable']) self.failUnlessIn("ro_uri", file2data) - self.failUnlessEqual(to_str(file2data["ro_uri"]), self._test_read_uri) + self.failUnlessEqual(to_bytes(file2data["ro_uri"]), self._test_read_uri) self.failIfIn("rw_uri", file2data) d.addCallback(_got_testdir_json) return d diff --git a/src/allmydata/test/common_py3.py b/src/allmydata/test/common_py3.py index 88f52ce8d..0daf66e62 100644 --- a/src/allmydata/test/common_py3.py +++ b/src/allmydata/test/common_py3.py @@ -112,3 +112,26 @@ def skip_if_cannot_represent_filename(u): except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.") + +class Marker(object): + pass + +class FakeCanary(object): + """For use in storage tests. + + Can be moved back to test_storage.py once enough Python 3 porting has been + done. + """ + def __init__(self, ignore_disconnectors=False): + self.ignore = ignore_disconnectors + self.disconnectors = {} + def notifyOnDisconnect(self, f, *args, **kwargs): + if self.ignore: + return + m = Marker() + self.disconnectors[m] = (f, args, kwargs) + return m + def dontNotifyOnDisconnect(self, marker): + if self.ignore: + return + del self.disconnectors[marker] diff --git a/src/allmydata/test/test_crawler.py b/src/allmydata/test/test_crawler.py index 48d1ba26e..1ed217251 100644 --- a/src/allmydata/test/test_crawler.py +++ b/src/allmydata/test/test_crawler.py @@ -1,4 +1,20 @@ +""" +Tests for allmydata.storage.crawler. + +Ported to Python 3. +""" + from __future__ import print_function +from __future__ import division +from __future__ import absolute_import +from __future__ import unicode_literals + +from future.utils import PY2, PY3 +if PY2: + # Don't use future bytes, since it breaks tests. No further works is + # needed, once we're only on Python 3 we'll be deleting this future imports + # anyway, and tests pass just fine on Python 3. + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, str, max, min # noqa: F401 import time import os.path @@ -11,7 +27,7 @@ from allmydata.util import fileutil, hashutil, pollmixin from allmydata.storage.server import StorageServer, si_b2a from allmydata.storage.crawler import ShareCrawler, TimeSliceExceeded -from allmydata.test.test_storage import FakeCanary +from allmydata.test.common_py3 import FakeCanary from allmydata.test.common_util import StallMixin class BucketEnumeratingCrawler(ShareCrawler): @@ -22,6 +38,10 @@ class BucketEnumeratingCrawler(ShareCrawler): self.all_buckets = [] self.finished_d = defer.Deferred() def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): + if PY3: + # Bucket _inputs_ are bytes, and that's what we will compare this + # to: + storage_index_b32 = storage_index_b32.encode("ascii") self.all_buckets.append(storage_index_b32) def finished_cycle(self, cycle): eventually(self.finished_d.callback, None) @@ -36,6 +56,10 @@ class PacedCrawler(ShareCrawler): self.finished_d = defer.Deferred() self.yield_cb = None def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): + if PY3: + # Bucket _inputs_ are bytes, and that's what we will compare this + # to: + storage_index_b32 = storage_index_b32.encode("ascii") self.all_buckets.append(storage_index_b32) self.countdown -= 1 if self.countdown == 0: @@ -92,27 +116,27 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): return self.s.stopService() def si(self, i): - return hashutil.storage_index_hash(str(i)) + return hashutil.storage_index_hash(b"%d" % (i,)) def rs(self, i, serverid): - return hashutil.bucket_renewal_secret_hash(str(i), serverid) + return hashutil.bucket_renewal_secret_hash(b"%d" % (i,), serverid) def cs(self, i, serverid): - return hashutil.bucket_cancel_secret_hash(str(i), serverid) + return hashutil.bucket_cancel_secret_hash(b"%d" % (i,), serverid) def write(self, i, ss, serverid, tail=0): si = self.si(i) - si = si[:-1] + chr(tail) + si = si[:-1] + bytes(bytearray((tail,))) had,made = ss.remote_allocate_buckets(si, self.rs(i, serverid), self.cs(i, serverid), set([0]), 99, FakeCanary()) - made[0].remote_write(0, "data") + made[0].remote_write(0, b"data") made[0].remote_close() return si_b2a(si) def test_immediate(self): self.basedir = "crawler/Basic/immediate" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -141,7 +165,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_service(self): self.basedir = "crawler/Basic/service" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -169,7 +193,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_paced(self): self.basedir = "crawler/Basic/paced" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -271,7 +295,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_paced_service(self): self.basedir = "crawler/Basic/paced_service" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -338,7 +362,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): self.basedir = "crawler/Basic/cpu_usage" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -383,7 +407,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_empty_subclass(self): self.basedir = "crawler/Basic/empty_subclass" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) @@ -411,7 +435,7 @@ class Basic(unittest.TestCase, StallMixin, pollmixin.PollMixin): def test_oneshot(self): self.basedir = "crawler/Basic/oneshot" fileutil.make_dirs(self.basedir) - serverid = "\x00" * 20 + serverid = b"\x00" * 20 ss = StorageServer(self.basedir, serverid) ss.setServiceParent(self.s) diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index cf72fc9ac..376bd6ec6 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -84,7 +84,7 @@ from allmydata.util import encodingutil, fileutil from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ - get_io_encoding, get_filesystem_encoding, to_str, from_utf8_or_none, _reload, \ + get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ unicode_to_argv from twisted.python import usage @@ -144,6 +144,7 @@ class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): argv_to_unicode, lumiere_nfc.encode('latin1')) + @skipIf(PY3, "Python 2 only.") def test_unicode_to_output(self): encodingutil.io_encoding = 'koi8-r' self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) @@ -228,7 +229,8 @@ class EncodingUtil(ReallyEqualMixin): def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") - def test_unicode_to_output(self): + @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") + def test_unicode_to_output_py2(self): if 'argv' not in dir(self): return @@ -239,6 +241,10 @@ class EncodingUtil(ReallyEqualMixin): _reload() self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv) + @skipIf(PY2, "Python 3 only.") + def test_unicode_to_output_py3(self): + self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) + @skipIf(PY3, "Python 2 only.") def test_unicode_to_argv_py2(self): """unicode_to_argv() converts to bytes on Python 2.""" @@ -349,6 +355,8 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): _reload() def _check(self, inp, out, enc, optional_quotes, quote_newlines): + if PY3 and isinstance(out, bytes): + out = out.decode(enc or encodingutil.io_encoding) out2 = out if optional_quotes: out2 = out2[1:-1] @@ -376,6 +384,9 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): def _test_quote_output_all(self, enc): def check(inp, out, optional_quotes=False, quote_newlines=None): + if PY3: + # Result is always Unicode on Python 3 + out = out.decode("ascii") self._check(inp, out, enc, optional_quotes, quote_newlines) # optional single quotes @@ -438,7 +449,10 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): def test_quote_output_utf8(self, enc='utf-8'): def check(inp, out, optional_quotes=False, quote_newlines=None): - self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines) + if PY2: + # On Python 3 output is always Unicode: + out = out.encode('utf-8') + self._check(inp, out, enc, optional_quotes, quote_newlines) self._test_quote_output_all(enc) check(u"\u2621", u"'\u2621'", True) @@ -463,43 +477,50 @@ def win32_other(win32, other): return win32 if sys.platform == "win32" else other class QuotePaths(ReallyEqualMixin, unittest.TestCase): - def test_quote_path(self): - self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), b"'foo/bar'") - self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'") - self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar") - self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"') - self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"') - self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"') - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"), + def assertPathsEqual(self, actual, expected): + if PY3: + # On Python 3, results should be unicode: + expected = expected.decode("ascii") + self.failUnlessReallyEqual(actual, expected) + + def test_quote_path(self): + self.assertPathsEqual(quote_path([u'foo', u'bar']), b"'foo/bar'") + self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'") + self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar") + self.assertPathsEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"') + self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"') + self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"') + + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"), win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True), win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False), win32_other(b"C:\\foo", b"\\\\?\\C:\\foo")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"), win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True), win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'")) - self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False), + self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False), win32_other(b"\\\\foo\\bar", b"\\\\?\\UNC\\foo\\bar")) def test_quote_filepath(self): foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar')) - self.failUnlessReallyEqual(quote_filepath(foo_bar_fp), + self.assertPathsEqual(quote_filepath(foo_bar_fp), win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'")) - self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=True), + self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=True), win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'")) - self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=False), + self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=False), win32_other(b"C:\\foo\\bar", b"/foo/bar")) if sys.platform == "win32": foo_longfp = FilePath(u'\\\\?\\C:\\foo') - self.failUnlessReallyEqual(quote_filepath(foo_longfp), + self.assertPathsEqual(quote_filepath(foo_longfp), b"'C:\\foo'") - self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=True), + self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=True), b"'C:\\foo'") - self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=False), + self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=False), b"C:\\foo") @@ -600,12 +621,12 @@ class OpenBSD(EncodingUtil, unittest.TestCase): class TestToFromStr(ReallyEqualMixin, unittest.TestCase): - def test_to_str(self): - self.failUnlessReallyEqual(to_str(b"foo"), b"foo") - self.failUnlessReallyEqual(to_str(b"lumi\xc3\xa8re"), b"lumi\xc3\xa8re") - self.failUnlessReallyEqual(to_str(b"\xFF"), b"\xFF") # passes through invalid UTF-8 -- is this what we want? - self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), b"lumi\xc3\xa8re") - self.failUnlessReallyEqual(to_str(None), None) + def test_to_bytes(self): + self.failUnlessReallyEqual(to_bytes(b"foo"), b"foo") + self.failUnlessReallyEqual(to_bytes(b"lumi\xc3\xa8re"), b"lumi\xc3\xa8re") + self.failUnlessReallyEqual(to_bytes(b"\xFF"), b"\xFF") # passes through invalid UTF-8 -- is this what we want? + self.failUnlessReallyEqual(to_bytes(u"lumi\u00E8re"), b"lumi\xc3\xa8re") + self.failUnlessReallyEqual(to_bytes(None), None) def test_from_utf8_or_none(self): self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo") diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index ca7f2b0d0..14c342c41 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -35,24 +35,8 @@ from allmydata.test.no_network import NoNetworkServer from allmydata.storage_client import ( _StorageServer, ) +from .common_py3 import FakeCanary -class Marker(object): - pass - -class FakeCanary(object): - def __init__(self, ignore_disconnectors=False): - self.ignore = ignore_disconnectors - self.disconnectors = {} - def notifyOnDisconnect(self, f, *args, **kwargs): - if self.ignore: - return - m = Marker() - self.disconnectors[m] = (f, args, kwargs) - return m - def dontNotifyOnDisconnect(self, marker): - if self.ignore: - return - del self.disconnectors[marker] class FakeStatsProvider(object): def count(self, name, delta=1): diff --git a/src/allmydata/test/test_uri.py b/src/allmydata/test/test_uri.py index c04b1259d..f89fae151 100644 --- a/src/allmydata/test/test_uri.py +++ b/src/allmydata/test/test_uri.py @@ -1,3 +1,17 @@ +""" +Tests for allmydata.uri. + +Ported to Python 3. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + from future.builtins import filter, map, zip, ascii, chr, dict, hex, input, next, oct, open, pow, round, super, bytes, int, list, object, range, str, max, min # noqa: F401 import os from twisted.trial import unittest @@ -40,24 +54,24 @@ class Literal(testutil.ReallyEqualMixin, unittest.TestCase): self.failUnlessReallyEqual(u.get_verify_cap(), None) def test_empty(self): - data = "" # This data is some *very* small data! + data = b"" # This data is some *very* small data! return self._help_test(data) def test_pack(self): - data = "This is some small data" + data = b"This is some small data" return self._help_test(data) def test_nonascii(self): - data = "This contains \x00 and URI:LIT: and \n, oh my." + data = b"This contains \x00 and URI:LIT: and \n, oh my." return self._help_test(data) class Compare(testutil.ReallyEqualMixin, unittest.TestCase): def test_compare(self): - lit1 = uri.LiteralFileURI("some data") - fileURI = 'URI:CHK:f5ahxa25t4qkktywz6teyfvcx4:opuioq7tj2y6idzfp6cazehtmgs5fdcebcz3cygrxyydvcozrmeq:3:10:345834' + lit1 = uri.LiteralFileURI(b"some data") + fileURI = b'URI:CHK:f5ahxa25t4qkktywz6teyfvcx4:opuioq7tj2y6idzfp6cazehtmgs5fdcebcz3cygrxyydvcozrmeq:3:10:345834' chk1 = uri.CHKFileURI.init_from_string(fileURI) chk2 = uri.CHKFileURI.init_from_string(fileURI) - unk = uri.UnknownURI("lafs://from_the_future") + unk = uri.UnknownURI(b"lafs://from_the_future") self.failIfEqual(lit1, chk1) self.failUnlessReallyEqual(chk1, chk2) self.failIfEqual(chk1, "not actually a URI") @@ -66,21 +80,24 @@ class Compare(testutil.ReallyEqualMixin, unittest.TestCase): self.failUnlessReallyEqual(len(s), 3) # since chk1==chk2 def test_is_uri(self): - lit1 = uri.LiteralFileURI("some data").to_string() + lit1 = uri.LiteralFileURI(b"some data").to_string() self.failUnless(uri.is_uri(lit1)) self.failIf(uri.is_uri(None)) def test_is_literal_file_uri(self): - lit1 = uri.LiteralFileURI("some data").to_string() + lit1 = uri.LiteralFileURI(b"some data").to_string() self.failUnless(uri.is_literal_file_uri(lit1)) self.failIf(uri.is_literal_file_uri(None)) self.failIf(uri.is_literal_file_uri("foo")) self.failIf(uri.is_literal_file_uri("ro.foo")) - self.failIf(uri.is_literal_file_uri("URI:LITfoo")) + self.failIf(uri.is_literal_file_uri(b"URI:LITfoo")) self.failUnless(uri.is_literal_file_uri("ro.URI:LIT:foo")) self.failUnless(uri.is_literal_file_uri("imm.URI:LIT:foo")) def test_has_uri_prefix(self): + self.failUnless(uri.has_uri_prefix(b"URI:foo")) + self.failUnless(uri.has_uri_prefix(b"ro.URI:foo")) + self.failUnless(uri.has_uri_prefix(b"imm.URI:foo")) self.failUnless(uri.has_uri_prefix("URI:foo")) self.failUnless(uri.has_uri_prefix("ro.URI:foo")) self.failUnless(uri.has_uri_prefix("imm.URI:foo")) @@ -89,9 +106,9 @@ class Compare(testutil.ReallyEqualMixin, unittest.TestCase): class CHKFile(testutil.ReallyEqualMixin, unittest.TestCase): def test_pack(self): - key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + key = b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" storage_index = hashutil.storage_index_hash(key) - uri_extension_hash = hashutil.uri_extension_hash("stuff") + uri_extension_hash = hashutil.uri_extension_hash(b"stuff") needed_shares = 25 total_shares = 100 size = 1234 @@ -138,26 +155,26 @@ class CHKFile(testutil.ReallyEqualMixin, unittest.TestCase): self.failUnlessReallyEqual(u.to_string(), u2imm.to_string()) v = u.get_verify_cap() - self.failUnless(isinstance(v.to_string(), str)) + self.failUnless(isinstance(v.to_string(), bytes)) self.failUnless(v.is_readonly()) self.failIf(v.is_mutable()) v2 = uri.from_string(v.to_string()) self.failUnlessReallyEqual(v, v2) - v3 = uri.CHKFileVerifierURI(storage_index="\x00"*16, - uri_extension_hash="\x00"*32, + v3 = uri.CHKFileVerifierURI(storage_index=b"\x00"*16, + uri_extension_hash=b"\x00"*32, needed_shares=3, total_shares=10, size=1234) - self.failUnless(isinstance(v3.to_string(), str)) + self.failUnless(isinstance(v3.to_string(), bytes)) self.failUnless(v3.is_readonly()) self.failIf(v3.is_mutable()) def test_pack_badly(self): - key = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + key = b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" storage_index = hashutil.storage_index_hash(key) - uri_extension_hash = hashutil.uri_extension_hash("stuff") + uri_extension_hash = hashutil.uri_extension_hash(b"stuff") needed_shares = 25 total_shares = 100 size = 1234 @@ -186,35 +203,37 @@ class CHKFile(testutil.ReallyEqualMixin, unittest.TestCase): class Extension(testutil.ReallyEqualMixin, unittest.TestCase): def test_pack(self): - data = {"stuff": "value", - "size": 12, - "needed_shares": 3, - "big_hash": hashutil.tagged_hash("foo", "bar"), + data = {b"stuff": b"value", + b"size": 12, + b"needed_shares": 3, + b"big_hash": hashutil.tagged_hash(b"foo", b"bar"), } ext = uri.pack_extension(data) d = uri.unpack_extension(ext) - self.failUnlessReallyEqual(d["stuff"], "value") - self.failUnlessReallyEqual(d["size"], 12) - self.failUnlessReallyEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar")) + self.failUnlessReallyEqual(d[b"stuff"], b"value") + self.failUnlessReallyEqual(d[b"size"], 12) + self.failUnlessReallyEqual(d[b"big_hash"], hashutil.tagged_hash(b"foo", b"bar")) readable = uri.unpack_extension_readable(ext) - self.failUnlessReallyEqual(readable["needed_shares"], 3) - self.failUnlessReallyEqual(readable["stuff"], "value") - self.failUnlessReallyEqual(readable["size"], 12) - self.failUnlessReallyEqual(readable["big_hash"], - base32.b2a(hashutil.tagged_hash("foo", "bar"))) - self.failUnlessReallyEqual(readable["UEB_hash"], + self.failUnlessReallyEqual(readable[b"needed_shares"], 3) + self.failUnlessReallyEqual(readable[b"stuff"], b"value") + self.failUnlessReallyEqual(readable[b"size"], 12) + self.failUnlessReallyEqual(readable[b"big_hash"], + base32.b2a(hashutil.tagged_hash(b"foo", b"bar"))) + self.failUnlessReallyEqual(readable[b"UEB_hash"], base32.b2a(hashutil.uri_extension_hash(ext))) class Unknown(testutil.ReallyEqualMixin, unittest.TestCase): def test_from_future(self): # any URI type that we don't recognize should be treated as unknown - future_uri = "I am a URI from the future. Whatever you do, don't " + future_uri = b"I am a URI from the future. Whatever you do, don't " u = uri.from_string(future_uri) self.failUnless(isinstance(u, uri.UnknownURI)) self.failUnlessReallyEqual(u.to_string(), future_uri) self.failUnless(u.get_readonly() is None) self.failUnless(u.get_error() is None) + future_uri_unicode = future_uri.decode("utf-8") + self.assertEqual(future_uri, uri.from_string(future_uri_unicode).to_string()) u2 = uri.UnknownURI(future_uri, error=CapConstraintError("...")) self.failUnlessReallyEqual(u.to_string(), future_uri) @@ -222,7 +241,7 @@ class Unknown(testutil.ReallyEqualMixin, unittest.TestCase): self.failUnless(isinstance(u2.get_error(), CapConstraintError)) # Future caps might have non-ASCII chars in them. (Or maybe not, who can tell about the future?) - future_uri = u"I am a cap from the \u263A future. Whatever you ".encode('utf-8') + future_uri = u"I am a cap from the \u263A future. Whatever you ".encode("utf-8") u = uri.from_string(future_uri) self.failUnless(isinstance(u, uri.UnknownURI)) self.failUnlessReallyEqual(u.to_string(), future_uri) @@ -236,15 +255,15 @@ class Unknown(testutil.ReallyEqualMixin, unittest.TestCase): class Constraint(testutil.ReallyEqualMixin, unittest.TestCase): def test_constraint(self): - bad = "http://127.0.0.1:3456/uri/URI%3ADIR2%3Agh3l5rbvnv2333mrfvalmjfr4i%3Alz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma/" + bad = b"http://127.0.0.1:3456/uri/URI%3ADIR2%3Agh3l5rbvnv2333mrfvalmjfr4i%3Alz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma/" self.failUnlessRaises(uri.BadURIError, uri.DirectoryURI.init_from_string, bad) - fileURI = 'URI:CHK:gh3l5rbvnv2333mrfvalmjfr4i:lz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma:3:10:345834' + fileURI = b'URI:CHK:gh3l5rbvnv2333mrfvalmjfr4i:lz6l7u3z3b7g37s4zkdmfpx5ly4ib4m6thrpbusi6ys62qtc6mma:3:10:345834' uri.CHKFileURI.init_from_string(fileURI) class Mutable(testutil.ReallyEqualMixin, unittest.TestCase): def setUp(self): - self.writekey = "\x01" * 16 - self.fingerprint = "\x02" * 32 + self.writekey = b"\x01" * 16 + self.fingerprint = b"\x02" * 32 self.readkey = hashutil.ssk_readkey_hash(self.writekey) self.storage_index = hashutil.ssk_storage_index_hash(self.readkey) @@ -410,28 +429,29 @@ class Mutable(testutil.ReallyEqualMixin, unittest.TestCase): u1 = uri.WriteableMDMFFileURI(self.writekey, self.fingerprint) cap = u1.to_string() - cap2 = cap+":I COME FROM THE FUTURE" + cap2 = cap+b":I COME FROM THE FUTURE" u2 = uri.WriteableMDMFFileURI.init_from_string(cap2) self.failUnlessReallyEqual(self.writekey, u2.writekey) self.failUnlessReallyEqual(self.fingerprint, u2.fingerprint) self.failIf(u2.is_readonly()) self.failUnless(u2.is_mutable()) - cap3 = cap+":"+os.urandom(40) # parse *that*! + + cap3 = cap+b":" + os.urandom(40) u3 = uri.WriteableMDMFFileURI.init_from_string(cap3) self.failUnlessReallyEqual(self.writekey, u3.writekey) self.failUnlessReallyEqual(self.fingerprint, u3.fingerprint) self.failIf(u3.is_readonly()) self.failUnless(u3.is_mutable()) - cap4 = u1.get_readonly().to_string()+":ooh scary future stuff" + cap4 = u1.get_readonly().to_string()+b":ooh scary future stuff" u4 = uri.from_string_mutable_filenode(cap4) self.failUnlessReallyEqual(self.readkey, u4.readkey) self.failUnlessReallyEqual(self.fingerprint, u4.fingerprint) self.failUnless(u4.is_readonly()) self.failUnless(u4.is_mutable()) - cap5 = u1.get_verify_cap().to_string()+":spoilers!" + cap5 = u1.get_verify_cap().to_string()+b":spoilers!" u5 = uri.from_string(cap5) self.failUnlessReallyEqual(self.storage_index, u5.storage_index) self.failUnlessReallyEqual(self.fingerprint, u5.fingerprint) @@ -468,8 +488,8 @@ class Mutable(testutil.ReallyEqualMixin, unittest.TestCase): class Dirnode(testutil.ReallyEqualMixin, unittest.TestCase): def test_pack(self): - writekey = "\x01" * 16 - fingerprint = "\x02" * 32 + writekey = b"\x01" * 16 + fingerprint = b"\x02" * 32 n = uri.WriteableSSKFileURI(writekey, fingerprint) u1 = uri.DirectoryURI(n) @@ -536,8 +556,8 @@ class Dirnode(testutil.ReallyEqualMixin, unittest.TestCase): u1.get_verify_cap()._filenode_uri) def test_immutable(self): - readkey = "\x01" * 16 - uri_extension_hash = hashutil.uri_extension_hash("stuff") + readkey = b"\x01" * 16 + uri_extension_hash = hashutil.uri_extension_hash(b"stuff") needed_shares = 3 total_shares = 10 size = 1234 @@ -548,7 +568,7 @@ class Dirnode(testutil.ReallyEqualMixin, unittest.TestCase): total_shares=total_shares, size=size) fncap = fnuri.to_string() - self.failUnlessReallyEqual(fncap, "URI:CHK:aeaqcaibaeaqcaibaeaqcaibae:nf3nimquen7aeqm36ekgxomalstenpkvsdmf6fplj7swdatbv5oa:3:10:1234") + self.failUnlessReallyEqual(fncap, b"URI:CHK:aeaqcaibaeaqcaibaeaqcaibae:nf3nimquen7aeqm36ekgxomalstenpkvsdmf6fplj7swdatbv5oa:3:10:1234") u1 = uri.ImmutableDirectoryURI(fnuri) self.failUnless(u1.is_readonly()) self.failIf(u1.is_mutable()) @@ -587,20 +607,20 @@ class Dirnode(testutil.ReallyEqualMixin, unittest.TestCase): self.failUnless(IVerifierURI.providedBy(u2_verifier)) u2vs = u2_verifier.to_string() # URI:DIR2-CHK-Verifier:$key:$ueb:$k:$n:$size - self.failUnless(u2vs.startswith("URI:DIR2-CHK-Verifier:"), u2vs) + self.failUnless(u2vs.startswith(b"URI:DIR2-CHK-Verifier:"), u2vs) u2_verifier_fileuri = u2_verifier.get_filenode_cap() self.failUnless(IVerifierURI.providedBy(u2_verifier_fileuri)) u2vfs = u2_verifier_fileuri.to_string() # URI:CHK-Verifier:$key:$ueb:$k:$n:$size self.failUnlessReallyEqual(u2vfs, fnuri.get_verify_cap().to_string()) - self.failUnlessReallyEqual(u2vs[len("URI:DIR2-"):], u2vfs[len("URI:"):]) + self.failUnlessReallyEqual(u2vs[len(b"URI:DIR2-"):], u2vfs[len(b"URI:"):]) self.failUnless(str(u2_verifier)) def test_literal(self): - u0 = uri.LiteralFileURI("data") + u0 = uri.LiteralFileURI(b"data") u1 = uri.LiteralDirectoryURI(u0) self.failUnless(str(u1)) - self.failUnlessReallyEqual(u1.to_string(), "URI:DIR2-LIT:mrqxiyi") + self.failUnlessReallyEqual(u1.to_string(), b"URI:DIR2-LIT:mrqxiyi") self.failUnless(u1.is_readonly()) self.failIf(u1.is_mutable()) self.failUnless(IURI.providedBy(u1)) @@ -608,11 +628,11 @@ class Dirnode(testutil.ReallyEqualMixin, unittest.TestCase): self.failUnless(IDirnodeURI.providedBy(u1)) self.failUnlessReallyEqual(u1.get_verify_cap(), None) self.failUnlessReallyEqual(u1.get_storage_index(), None) - self.failUnlessReallyEqual(u1.abbrev_si(), "") + self.failUnlessReallyEqual(u1.abbrev_si(), b"") def test_mdmf(self): - writekey = "\x01" * 16 - fingerprint = "\x02" * 32 + writekey = b"\x01" * 16 + fingerprint = b"\x02" * 32 uri1 = uri.WriteableMDMFFileURI(writekey, fingerprint) d1 = uri.MDMFDirectoryURI(uri1) self.failIf(d1.is_readonly()) @@ -635,8 +655,8 @@ class Dirnode(testutil.ReallyEqualMixin, unittest.TestCase): self.failUnlessIsInstance(d3, uri.UnknownURI) def test_mdmf_attenuation(self): - writekey = "\x01" * 16 - fingerprint = "\x02" * 32 + writekey = b"\x01" * 16 + fingerprint = b"\x02" * 32 uri1 = uri.WriteableMDMFFileURI(writekey, fingerprint) d1 = uri.MDMFDirectoryURI(uri1) @@ -676,8 +696,8 @@ class Dirnode(testutil.ReallyEqualMixin, unittest.TestCase): def test_mdmf_verifier(self): # I'm not sure what I want to write here yet. - writekey = "\x01" * 16 - fingerprint = "\x02" * 32 + writekey = b"\x01" * 16 + fingerprint = b"\x02" * 32 uri1 = uri.WriteableMDMFFileURI(writekey, fingerprint) d1 = uri.MDMFDirectoryURI(uri1) v1 = d1.get_verify_cap() diff --git a/src/allmydata/test/web/test_grid.py b/src/allmydata/test/web/test_grid.py index 73c354567..2a52d98e5 100644 --- a/src/allmydata/test/web/test_grid.py +++ b/src/allmydata/test/web/test_grid.py @@ -10,7 +10,7 @@ from twisted.web import resource from twisted.trial import unittest from allmydata import uri, dirnode from allmydata.util import base32 -from allmydata.util.encodingutil import to_str +from allmydata.util.encodingutil import to_bytes from allmydata.util.consumer import download_to_data from allmydata.util.netstring import split_netstring from allmydata.unknown import UnknownNode @@ -367,13 +367,13 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi f = data[1]["children"][name] self.failUnlessEqual(f[0], "unknown") if expect_rw_uri: - self.failUnlessReallyEqual(to_str(f[1]["rw_uri"]), unknown_rwcap, data) + self.failUnlessReallyEqual(to_bytes(f[1]["rw_uri"]), unknown_rwcap, data) else: self.failIfIn("rw_uri", f[1]) if immutable: - self.failUnlessReallyEqual(to_str(f[1]["ro_uri"]), unknown_immcap, data) + self.failUnlessReallyEqual(to_bytes(f[1]["ro_uri"]), unknown_immcap, data) else: - self.failUnlessReallyEqual(to_str(f[1]["ro_uri"]), unknown_rocap, data) + self.failUnlessReallyEqual(to_bytes(f[1]["ro_uri"]), unknown_rocap, data) self.failUnlessIn("metadata", f[1]) d.addCallback(_check_directory_json, expect_rw_uri=not immutable) @@ -406,18 +406,18 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi data = json.loads(res) self.failUnlessEqual(data[0], "unknown") if expect_rw_uri: - self.failUnlessReallyEqual(to_str(data[1]["rw_uri"]), unknown_rwcap, data) + self.failUnlessReallyEqual(to_bytes(data[1]["rw_uri"]), unknown_rwcap, data) else: self.failIfIn("rw_uri", data[1]) if immutable: - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), unknown_immcap, data) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), unknown_immcap, data) self.failUnlessReallyEqual(data[1]["mutable"], False) elif expect_rw_uri: - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), unknown_rocap, data) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), unknown_rocap, data) self.failUnlessReallyEqual(data[1]["mutable"], True) else: - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), unknown_rocap, data) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), unknown_rocap, data) self.failIfIn("mutable", data[1]) # TODO: check metadata contents @@ -581,7 +581,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi ll_type, ll_data = listed_children[u"lonely"] self.failUnlessEqual(ll_type, "filenode") self.failIfIn("rw_uri", ll_data) - self.failUnlessReallyEqual(to_str(ll_data["ro_uri"]), lonely_uri) + self.failUnlessReallyEqual(to_bytes(ll_data["ro_uri"]), lonely_uri) d.addCallback(_check_json) return d @@ -643,14 +643,14 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi u0 = units[0] self.failUnlessEqual(u0["path"], []) self.failUnlessEqual(u0["type"], "directory") - self.failUnlessReallyEqual(to_str(u0["cap"]), self.rootnode.get_uri()) + self.failUnlessReallyEqual(to_bytes(u0["cap"]), self.rootnode.get_uri()) u0cr = u0["check-results"] self.failUnlessReallyEqual(u0cr["results"]["count-happiness"], 10) self.failUnlessReallyEqual(u0cr["results"]["count-shares-good"], 10) ugood = [u for u in units if u["type"] == "file" and u["path"] == [u"good"]][0] - self.failUnlessReallyEqual(to_str(ugood["cap"]), self.uris["good"]) + self.failUnlessReallyEqual(to_bytes(ugood["cap"]), self.uris["good"]) ugoodcr = ugood["check-results"] self.failUnlessReallyEqual(ugoodcr["results"]["count-happiness"], 10) self.failUnlessReallyEqual(ugoodcr["results"]["count-shares-good"], 10) @@ -672,7 +672,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi self.failUnlessEqual(units[-1]["type"], "stats") first = units[0] self.failUnlessEqual(first["path"], []) - self.failUnlessEqual(to_str(first["cap"]), self.rootnode.get_uri()) + self.failUnlessEqual(to_bytes(first["cap"]), self.rootnode.get_uri()) self.failUnlessEqual(first["type"], "directory") stats = units[-1]["stats"] self.failUnlessReallyEqual(stats["count-immutable-files"], 2) @@ -826,7 +826,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi u0 = units[0] self.failUnlessEqual(u0["path"], []) self.failUnlessEqual(u0["type"], "directory") - self.failUnlessReallyEqual(to_str(u0["cap"]), self.rootnode.get_uri()) + self.failUnlessReallyEqual(to_bytes(u0["cap"]), self.rootnode.get_uri()) u0crr = u0["check-and-repair-results"] self.failUnlessReallyEqual(u0crr["repair-attempted"], False) self.failUnlessReallyEqual(u0crr["pre-repair-results"]["results"]["count-happiness"], 10) @@ -834,7 +834,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi ugood = [u for u in units if u["type"] == "file" and u["path"] == [u"good"]][0] - self.failUnlessEqual(to_str(ugood["cap"]), self.uris["good"]) + self.failUnlessEqual(to_bytes(ugood["cap"]), self.uris["good"]) ugoodcrr = ugood["check-and-repair-results"] self.failUnlessReallyEqual(ugoodcrr["repair-attempted"], False) self.failUnlessReallyEqual(ugoodcrr["pre-repair-results"]["results"]["count-happiness"], 10) @@ -842,7 +842,7 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi usick = [u for u in units if u["type"] == "file" and u["path"] == [u"sick"]][0] - self.failUnlessReallyEqual(to_str(usick["cap"]), self.uris["sick"]) + self.failUnlessReallyEqual(to_bytes(usick["cap"]), self.uris["sick"]) usickcrr = usick["check-and-repair-results"] self.failUnlessReallyEqual(usickcrr["repair-attempted"], True) self.failUnlessReallyEqual(usickcrr["repair-successful"], True) diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index b4d604ed4..0dc12a4c5 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -36,7 +36,7 @@ from allmydata.nodemaker import NodeMaker from allmydata.web.common import WebError, MultiFormatPage from allmydata.util import fileutil, base32, hashutil from allmydata.util.consumer import download_to_data -from allmydata.util.encodingutil import to_str +from allmydata.util.encodingutil import to_bytes from ...util.connection_status import ConnectionStatus from ..common import ( EMPTY_CLIENT_CONFIG, @@ -457,8 +457,8 @@ class WebMixin(TimezoneMixin): self.failUnless(isinstance(data[1], dict)) self.failIf(data[1]["mutable"]) self.failIfIn("rw_uri", data[1]) # immutable - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), self._bar_txt_uri) - self.failUnlessReallyEqual(to_str(data[1]["verify_uri"]), self._bar_txt_verifycap) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), self._bar_txt_uri) + self.failUnlessReallyEqual(to_bytes(data[1]["verify_uri"]), self._bar_txt_verifycap) self.failUnlessReallyEqual(data[1]["size"], len(self.BAR_CONTENTS)) def failUnlessIsQuuxJSON(self, res, readonly=False): @@ -487,9 +487,9 @@ class WebMixin(TimezoneMixin): self.failUnless(isinstance(data[1], dict)) self.failUnless(data[1]["mutable"]) self.failUnlessIn("rw_uri", data[1]) # mutable - self.failUnlessReallyEqual(to_str(data[1]["rw_uri"]), self._foo_uri) - self.failUnlessReallyEqual(to_str(data[1]["ro_uri"]), self._foo_readonly_uri) - self.failUnlessReallyEqual(to_str(data[1]["verify_uri"]), self._foo_verifycap) + self.failUnlessReallyEqual(to_bytes(data[1]["rw_uri"]), self._foo_uri) + self.failUnlessReallyEqual(to_bytes(data[1]["ro_uri"]), self._foo_readonly_uri) + self.failUnlessReallyEqual(to_bytes(data[1]["verify_uri"]), self._foo_verifycap) kidnames = sorted([unicode(n) for n in data[1]["children"]]) self.failUnlessEqual(kidnames, @@ -506,19 +506,19 @@ class WebMixin(TimezoneMixin): self.failUnlessIn("linkmotime", tahoe_md) self.failUnlessEqual(kids[u"bar.txt"][0], "filenode") self.failUnlessReallyEqual(kids[u"bar.txt"][1]["size"], len(self.BAR_CONTENTS)) - self.failUnlessReallyEqual(to_str(kids[u"bar.txt"][1]["ro_uri"]), self._bar_txt_uri) - self.failUnlessReallyEqual(to_str(kids[u"bar.txt"][1]["verify_uri"]), + self.failUnlessReallyEqual(to_bytes(kids[u"bar.txt"][1]["ro_uri"]), self._bar_txt_uri) + self.failUnlessReallyEqual(to_bytes(kids[u"bar.txt"][1]["verify_uri"]), self._bar_txt_verifycap) self.failUnlessIn("metadata", kids[u"bar.txt"][1]) self.failUnlessIn("tahoe", kids[u"bar.txt"][1]["metadata"]) self.failUnlessReallyEqual(kids[u"bar.txt"][1]["metadata"]["tahoe"]["linkcrtime"], self._bar_txt_metadata["tahoe"]["linkcrtime"]) - self.failUnlessReallyEqual(to_str(kids[u"n\u00fc.txt"][1]["ro_uri"]), + self.failUnlessReallyEqual(to_bytes(kids[u"n\u00fc.txt"][1]["ro_uri"]), self._bar_txt_uri) self.failUnlessIn("quux.txt", kids) - self.failUnlessReallyEqual(to_str(kids[u"quux.txt"][1]["rw_uri"]), + self.failUnlessReallyEqual(to_bytes(kids[u"quux.txt"][1]["rw_uri"]), self._quux_txt_uri) - self.failUnlessReallyEqual(to_str(kids[u"quux.txt"][1]["ro_uri"]), + self.failUnlessReallyEqual(to_bytes(kids[u"quux.txt"][1]["ro_uri"]), self._quux_txt_readonly_uri) @inlineCallbacks @@ -2181,7 +2181,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi got = {} for (path_list, cap) in data: got[tuple(path_list)] = cap - self.failUnlessReallyEqual(to_str(got[(u"sub",)]), self._sub_uri) + self.failUnlessReallyEqual(to_bytes(got[(u"sub",)]), self._sub_uri) self.failUnlessIn((u"sub", u"baz.txt"), got) self.failUnlessIn("finished", res) self.failUnlessIn("origin", res) @@ -2266,9 +2266,9 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi self.failUnlessEqual(units[-1]["type"], "stats") first = units[0] self.failUnlessEqual(first["path"], []) - self.failUnlessReallyEqual(to_str(first["cap"]), self._foo_uri) + self.failUnlessReallyEqual(to_bytes(first["cap"]), self._foo_uri) self.failUnlessEqual(first["type"], "directory") - baz = [u for u in units[:-1] if to_str(u["cap"]) == self._baz_file_uri][0] + baz = [u for u in units[:-1] if to_bytes(u["cap"]) == self._baz_file_uri][0] self.failUnlessEqual(baz["path"], ["sub", "baz.txt"]) self.failIfEqual(baz["storage-index"], None) self.failIfEqual(baz["verifycap"], None) @@ -2281,14 +2281,14 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi def test_GET_DIRURL_uri(self): d = self.GET(self.public_url + "/foo?t=uri") def _check(res): - self.failUnlessReallyEqual(to_str(res), self._foo_uri) + self.failUnlessReallyEqual(to_bytes(res), self._foo_uri) d.addCallback(_check) return d def test_GET_DIRURL_readonly_uri(self): d = self.GET(self.public_url + "/foo?t=readonly-uri") def _check(res): - self.failUnlessReallyEqual(to_str(res), self._foo_readonly_uri) + self.failUnlessReallyEqual(to_bytes(res), self._foo_readonly_uri) d.addCallback(_check) return d @@ -2950,9 +2950,9 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi new_json = children[u"new.txt"] self.failUnlessEqual(new_json[0], "filenode") self.failUnless(new_json[1]["mutable"]) - self.failUnlessReallyEqual(to_str(new_json[1]["rw_uri"]), self._mutable_uri) + self.failUnlessReallyEqual(to_bytes(new_json[1]["rw_uri"]), self._mutable_uri) ro_uri = self._mutable_node.get_readonly().to_string() - self.failUnlessReallyEqual(to_str(new_json[1]["ro_uri"]), ro_uri) + self.failUnlessReallyEqual(to_bytes(new_json[1]["ro_uri"]), ro_uri) d.addCallback(_check_page_json) # and the JSON form of the file @@ -2962,9 +2962,9 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi parsed = json.loads(res) self.failUnlessEqual(parsed[0], "filenode") self.failUnless(parsed[1]["mutable"]) - self.failUnlessReallyEqual(to_str(parsed[1]["rw_uri"]), self._mutable_uri) + self.failUnlessReallyEqual(to_bytes(parsed[1]["rw_uri"]), self._mutable_uri) ro_uri = self._mutable_node.get_readonly().to_string() - self.failUnlessReallyEqual(to_str(parsed[1]["ro_uri"]), ro_uri) + self.failUnlessReallyEqual(to_bytes(parsed[1]["ro_uri"]), ro_uri) d.addCallback(_check_file_json) # and look at t=uri and t=readonly-uri diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py index 051b45f79..b601226da 100644 --- a/src/allmydata/uri.py +++ b/src/allmydata/uri.py @@ -1,3 +1,22 @@ +""" +URIs (kinda sorta, really they're capabilities?). + +Ported to Python 3. + +Methods ending in to_string() are actually to_bytes(), possibly should be fixed +in follow-up port. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from future.utils import PY2 +if PY2: + # Don't import bytes, to prevent leaks. + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, dict, list, object, range, str, max, min # noqa: F401 + +from past.builtins import unicode, long import re @@ -24,10 +43,10 @@ class BadURIError(CapConstraintError): # - make variable and method names consistently use _uri for an URI string, # and _cap for a Cap object (decoded URI) -BASE32STR_128bits = '(%s{25}%s)' % (base32.BASE32CHAR, base32.BASE32CHAR_3bits) -BASE32STR_256bits = '(%s{51}%s)' % (base32.BASE32CHAR, base32.BASE32CHAR_1bits) +BASE32STR_128bits = b'(%s{25}%s)' % (base32.BASE32CHAR, base32.BASE32CHAR_3bits) +BASE32STR_256bits = b'(%s{51}%s)' % (base32.BASE32CHAR, base32.BASE32CHAR_1bits) -NUMBER='([0-9]+)' +NUMBER=b'([0-9]+)' class _BaseURI(object): @@ -53,10 +72,10 @@ class _BaseURI(object): @implementer(IURI, IImmutableFileURI) class CHKFileURI(_BaseURI): - BASE_STRING='URI:CHK:' - STRING_RE=re.compile('^URI:CHK:'+BASE32STR_128bits+':'+ - BASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER+ - '$') + BASE_STRING=b'URI:CHK:' + STRING_RE=re.compile(b'^URI:CHK:'+BASE32STR_128bits+b':'+ + BASE32STR_256bits+b':'+NUMBER+b':'+NUMBER+b':'+NUMBER+ + b'$') def __init__(self, key, uri_extension_hash, needed_shares, total_shares, size): @@ -82,7 +101,7 @@ class CHKFileURI(_BaseURI): assert isinstance(self.total_shares, int) assert isinstance(self.size, (int,long)) - return ('URI:CHK:%s:%s:%d:%d:%d' % + return (b'URI:CHK:%s:%s:%d:%d:%d' % (base32.b2a(self.key), base32.b2a(self.uri_extension_hash), self.needed_shares, @@ -112,9 +131,9 @@ class CHKFileURI(_BaseURI): @implementer(IVerifierURI) class CHKFileVerifierURI(_BaseURI): - BASE_STRING='URI:CHK-Verifier:' - STRING_RE=re.compile('^URI:CHK-Verifier:'+BASE32STR_128bits+':'+ - BASE32STR_256bits+':'+NUMBER+':'+NUMBER+':'+NUMBER) + BASE_STRING=b'URI:CHK-Verifier:' + STRING_RE=re.compile(b'^URI:CHK-Verifier:'+BASE32STR_128bits+b':'+ + BASE32STR_256bits+b':'+NUMBER+b':'+NUMBER+b':'+NUMBER) def __init__(self, storage_index, uri_extension_hash, needed_shares, total_shares, size): @@ -138,7 +157,7 @@ class CHKFileVerifierURI(_BaseURI): assert isinstance(self.total_shares, int) assert isinstance(self.size, (int,long)) - return ('URI:CHK-Verifier:%s:%s:%d:%d:%d' % + return (b'URI:CHK-Verifier:%s:%s:%d:%d:%d' % (si_b2a(self.storage_index), base32.b2a(self.uri_extension_hash), self.needed_shares, @@ -161,12 +180,12 @@ class CHKFileVerifierURI(_BaseURI): @implementer(IURI, IImmutableFileURI) class LiteralFileURI(_BaseURI): - BASE_STRING='URI:LIT:' - STRING_RE=re.compile('^URI:LIT:'+base32.BASE32STR_anybytes+'$') + BASE_STRING=b'URI:LIT:' + STRING_RE=re.compile(b'^URI:LIT:'+base32.BASE32STR_anybytes+b'$') def __init__(self, data=None): if data is not None: - assert isinstance(data, str) + assert isinstance(data, bytes) self.data = data @classmethod @@ -177,7 +196,7 @@ class LiteralFileURI(_BaseURI): return cls(base32.a2b(mo.group(1))) def to_string(self): - return 'URI:LIT:%s' % base32.b2a(self.data) + return b'URI:LIT:%s' % base32.b2a(self.data) def is_readonly(self): return True @@ -202,9 +221,9 @@ class LiteralFileURI(_BaseURI): @implementer(IURI, IMutableFileURI) class WriteableSSKFileURI(_BaseURI): - BASE_STRING='URI:SSK:' - STRING_RE=re.compile('^'+BASE_STRING+BASE32STR_128bits+':'+ - BASE32STR_256bits+'$') + BASE_STRING=b'URI:SSK:' + STRING_RE=re.compile(b'^'+BASE_STRING+BASE32STR_128bits+b':'+ + BASE32STR_256bits+b'$') def __init__(self, writekey, fingerprint): self.writekey = writekey @@ -221,10 +240,10 @@ class WriteableSSKFileURI(_BaseURI): return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): - assert isinstance(self.writekey, str) - assert isinstance(self.fingerprint, str) - return 'URI:SSK:%s:%s' % (base32.b2a(self.writekey), - base32.b2a(self.fingerprint)) + assert isinstance(self.writekey, bytes) + assert isinstance(self.fingerprint, bytes) + return b'URI:SSK:%s:%s' % (base32.b2a(self.writekey), + base32.b2a(self.fingerprint)) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, self.abbrev()) @@ -251,8 +270,8 @@ class WriteableSSKFileURI(_BaseURI): @implementer(IURI, IMutableFileURI) class ReadonlySSKFileURI(_BaseURI): - BASE_STRING='URI:SSK-RO:' - STRING_RE=re.compile('^URI:SSK-RO:'+BASE32STR_128bits+':'+BASE32STR_256bits+'$') + BASE_STRING=b'URI:SSK-RO:' + STRING_RE=re.compile(b'^URI:SSK-RO:'+BASE32STR_128bits+b':'+BASE32STR_256bits+b'$') def __init__(self, readkey, fingerprint): self.readkey = readkey @@ -268,10 +287,10 @@ class ReadonlySSKFileURI(_BaseURI): return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): - assert isinstance(self.readkey, str) - assert isinstance(self.fingerprint, str) - return 'URI:SSK-RO:%s:%s' % (base32.b2a(self.readkey), - base32.b2a(self.fingerprint)) + assert isinstance(self.readkey, bytes) + assert isinstance(self.fingerprint, bytes) + return b'URI:SSK-RO:%s:%s' % (base32.b2a(self.readkey), + base32.b2a(self.fingerprint)) def __repr__(self): return "<%s %s>" % (self.__class__.__name__, self.abbrev()) @@ -298,8 +317,8 @@ class ReadonlySSKFileURI(_BaseURI): @implementer(IVerifierURI) class SSKVerifierURI(_BaseURI): - BASE_STRING='URI:SSK-Verifier:' - STRING_RE=re.compile('^'+BASE_STRING+BASE32STR_128bits+':'+BASE32STR_256bits+'$') + BASE_STRING=b'URI:SSK-Verifier:' + STRING_RE=re.compile(b'^'+BASE_STRING+BASE32STR_128bits+b':'+BASE32STR_256bits+b'$') def __init__(self, storage_index, fingerprint): assert len(storage_index) == 16 @@ -314,10 +333,10 @@ class SSKVerifierURI(_BaseURI): return cls(si_a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): - assert isinstance(self.storage_index, str) - assert isinstance(self.fingerprint, str) - return 'URI:SSK-Verifier:%s:%s' % (si_b2a(self.storage_index), - base32.b2a(self.fingerprint)) + assert isinstance(self.storage_index, bytes) + assert isinstance(self.fingerprint, bytes) + return b'URI:SSK-Verifier:%s:%s' % (si_b2a(self.storage_index), + base32.b2a(self.fingerprint)) def is_readonly(self): return True @@ -335,8 +354,8 @@ class SSKVerifierURI(_BaseURI): @implementer(IURI, IMutableFileURI) class WriteableMDMFFileURI(_BaseURI): - BASE_STRING='URI:MDMF:' - STRING_RE=re.compile('^'+BASE_STRING+BASE32STR_128bits+':'+BASE32STR_256bits+'(:|$)') + BASE_STRING=b'URI:MDMF:' + STRING_RE=re.compile(b'^'+BASE_STRING+BASE32STR_128bits+b':'+BASE32STR_256bits+b'(:|$)') def __init__(self, writekey, fingerprint): self.writekey = writekey @@ -353,10 +372,10 @@ class WriteableMDMFFileURI(_BaseURI): return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): - assert isinstance(self.writekey, str) - assert isinstance(self.fingerprint, str) - ret = 'URI:MDMF:%s:%s' % (base32.b2a(self.writekey), - base32.b2a(self.fingerprint)) + assert isinstance(self.writekey, bytes) + assert isinstance(self.fingerprint, bytes) + ret = b'URI:MDMF:%s:%s' % (base32.b2a(self.writekey), + base32.b2a(self.fingerprint)) return ret def __repr__(self): @@ -384,8 +403,8 @@ class WriteableMDMFFileURI(_BaseURI): @implementer(IURI, IMutableFileURI) class ReadonlyMDMFFileURI(_BaseURI): - BASE_STRING='URI:MDMF-RO:' - STRING_RE=re.compile('^' +BASE_STRING+BASE32STR_128bits+':'+BASE32STR_256bits+'(:|$)') + BASE_STRING=b'URI:MDMF-RO:' + STRING_RE=re.compile(b'^' +BASE_STRING+BASE32STR_128bits+b':'+BASE32STR_256bits+b'(:|$)') def __init__(self, readkey, fingerprint): self.readkey = readkey @@ -402,10 +421,10 @@ class ReadonlyMDMFFileURI(_BaseURI): return cls(base32.a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): - assert isinstance(self.readkey, str) - assert isinstance(self.fingerprint, str) - ret = 'URI:MDMF-RO:%s:%s' % (base32.b2a(self.readkey), - base32.b2a(self.fingerprint)) + assert isinstance(self.readkey, bytes) + assert isinstance(self.fingerprint, bytes) + ret = b'URI:MDMF-RO:%s:%s' % (base32.b2a(self.readkey), + base32.b2a(self.fingerprint)) return ret def __repr__(self): @@ -433,8 +452,8 @@ class ReadonlyMDMFFileURI(_BaseURI): @implementer(IVerifierURI) class MDMFVerifierURI(_BaseURI): - BASE_STRING='URI:MDMF-Verifier:' - STRING_RE=re.compile('^'+BASE_STRING+BASE32STR_128bits+':'+BASE32STR_256bits+'(:|$)') + BASE_STRING=b'URI:MDMF-Verifier:' + STRING_RE=re.compile(b'^'+BASE_STRING+BASE32STR_128bits+b':'+BASE32STR_256bits+b'(:|$)') def __init__(self, storage_index, fingerprint): assert len(storage_index) == 16 @@ -449,10 +468,10 @@ class MDMFVerifierURI(_BaseURI): return cls(si_a2b(mo.group(1)), base32.a2b(mo.group(2))) def to_string(self): - assert isinstance(self.storage_index, str) - assert isinstance(self.fingerprint, str) - ret = 'URI:MDMF-Verifier:%s:%s' % (si_b2a(self.storage_index), - base32.b2a(self.fingerprint)) + assert isinstance(self.storage_index, bytes) + assert isinstance(self.fingerprint, bytes) + ret = b'URI:MDMF-Verifier:%s:%s' % (si_b2a(self.storage_index), + base32.b2a(self.fingerprint)) return ret def is_readonly(self): @@ -494,12 +513,12 @@ class _DirectoryBaseURI(_BaseURI): return self.BASE_STRING+bits def abbrev(self): - return self._filenode_uri.to_string().split(':')[2][:5] + return self._filenode_uri.to_string().split(b':')[2][:5] def abbrev_si(self): si = self._filenode_uri.get_storage_index() if si is None: - return "" + return b"" return base32.b2a(si)[:5] def is_mutable(self): @@ -518,8 +537,8 @@ class _DirectoryBaseURI(_BaseURI): @implementer(IDirectoryURI) class DirectoryURI(_DirectoryBaseURI): - BASE_STRING='URI:DIR2:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=WriteableSSKFileURI def __init__(self, filenode_uri=None): @@ -537,8 +556,8 @@ class DirectoryURI(_DirectoryBaseURI): @implementer(IReadonlyDirectoryURI) class ReadonlyDirectoryURI(_DirectoryBaseURI): - BASE_STRING='URI:DIR2-RO:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2-RO:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=ReadonlySSKFileURI def __init__(self, filenode_uri=None): @@ -571,8 +590,8 @@ class _ImmutableDirectoryBaseURI(_DirectoryBaseURI): class ImmutableDirectoryURI(_ImmutableDirectoryBaseURI): - BASE_STRING='URI:DIR2-CHK:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2-CHK:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=CHKFileURI def get_verify_cap(self): @@ -581,8 +600,8 @@ class ImmutableDirectoryURI(_ImmutableDirectoryBaseURI): class LiteralDirectoryURI(_ImmutableDirectoryBaseURI): - BASE_STRING='URI:DIR2-LIT:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2-LIT:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=LiteralFileURI def get_verify_cap(self): @@ -593,8 +612,8 @@ class LiteralDirectoryURI(_ImmutableDirectoryBaseURI): @implementer(IDirectoryURI) class MDMFDirectoryURI(_DirectoryBaseURI): - BASE_STRING='URI:DIR2-MDMF:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2-MDMF:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=WriteableMDMFFileURI def __init__(self, filenode_uri=None): @@ -615,8 +634,8 @@ class MDMFDirectoryURI(_DirectoryBaseURI): @implementer(IReadonlyDirectoryURI) class ReadonlyMDMFDirectoryURI(_DirectoryBaseURI): - BASE_STRING='URI:DIR2-MDMF-RO:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2-MDMF-RO:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=ReadonlyMDMFFileURI def __init__(self, filenode_uri=None): @@ -653,8 +672,8 @@ def wrap_dirnode_cap(filecap): @implementer(IVerifierURI) class MDMFDirectoryURIVerifier(_DirectoryBaseURI): - BASE_STRING='URI:DIR2-MDMF-Verifier:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2-MDMF-Verifier:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=MDMFVerifierURI def __init__(self, filenode_uri=None): @@ -678,8 +697,8 @@ class MDMFDirectoryURIVerifier(_DirectoryBaseURI): @implementer(IVerifierURI) class DirectoryURIVerifier(_DirectoryBaseURI): - BASE_STRING='URI:DIR2-Verifier:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2-Verifier:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=SSKVerifierURI def __init__(self, filenode_uri=None): @@ -702,8 +721,8 @@ class DirectoryURIVerifier(_DirectoryBaseURI): @implementer(IVerifierURI) class ImmutableDirectoryURIVerifier(DirectoryURIVerifier): - BASE_STRING='URI:DIR2-CHK-Verifier:' - BASE_STRING_RE=re.compile('^'+BASE_STRING) + BASE_STRING=b'URI:DIR2-CHK-Verifier:' + BASE_STRING_RE=re.compile(b'^'+BASE_STRING) INNER_URI_CLASS=CHKFileVerifierURI @@ -725,12 +744,15 @@ class UnknownURI(object): return None -ALLEGED_READONLY_PREFIX = 'ro.' -ALLEGED_IMMUTABLE_PREFIX = 'imm.' +ALLEGED_READONLY_PREFIX = b'ro.' +ALLEGED_IMMUTABLE_PREFIX = b'imm.' def from_string(u, deep_immutable=False, name=u""): - if not isinstance(u, str): - raise TypeError("URI must be str: %r" % (u,)) + """Create URI from either unicode or byte string.""" + if isinstance(u, unicode): + u = u.encode("utf-8") + if not isinstance(u, bytes): + raise TypeError("URI must be unicode string or bytes: %r" % (u,)) # We allow and check ALLEGED_READONLY_PREFIX or ALLEGED_IMMUTABLE_PREFIX # on all URIs, even though we would only strictly need to do so for caps of @@ -748,62 +770,62 @@ def from_string(u, deep_immutable=False, name=u""): error = None try: - if s.startswith('URI:CHK:'): + if s.startswith(b'URI:CHK:'): return CHKFileURI.init_from_string(s) - elif s.startswith('URI:CHK-Verifier:'): + elif s.startswith(b'URI:CHK-Verifier:'): return CHKFileVerifierURI.init_from_string(s) - elif s.startswith('URI:LIT:'): + elif s.startswith(b'URI:LIT:'): return LiteralFileURI.init_from_string(s) - elif s.startswith('URI:SSK:'): + elif s.startswith(b'URI:SSK:'): if can_be_writeable: return WriteableSSKFileURI.init_from_string(s) kind = "URI:SSK file writecap" - elif s.startswith('URI:SSK-RO:'): + elif s.startswith(b'URI:SSK-RO:'): if can_be_mutable: return ReadonlySSKFileURI.init_from_string(s) kind = "URI:SSK-RO readcap to a mutable file" - elif s.startswith('URI:SSK-Verifier:'): + elif s.startswith(b'URI:SSK-Verifier:'): return SSKVerifierURI.init_from_string(s) - elif s.startswith('URI:MDMF:'): + elif s.startswith(b'URI:MDMF:'): if can_be_writeable: return WriteableMDMFFileURI.init_from_string(s) kind = "URI:MDMF file writecap" - elif s.startswith('URI:MDMF-RO:'): + elif s.startswith(b'URI:MDMF-RO:'): if can_be_mutable: return ReadonlyMDMFFileURI.init_from_string(s) kind = "URI:MDMF-RO readcap to a mutable file" - elif s.startswith('URI:MDMF-Verifier:'): + elif s.startswith(b'URI:MDMF-Verifier:'): return MDMFVerifierURI.init_from_string(s) - elif s.startswith('URI:DIR2:'): + elif s.startswith(b'URI:DIR2:'): if can_be_writeable: return DirectoryURI.init_from_string(s) kind = "URI:DIR2 directory writecap" - elif s.startswith('URI:DIR2-RO:'): + elif s.startswith(b'URI:DIR2-RO:'): if can_be_mutable: return ReadonlyDirectoryURI.init_from_string(s) kind = "URI:DIR2-RO readcap to a mutable directory" - elif s.startswith('URI:DIR2-Verifier:'): + elif s.startswith(b'URI:DIR2-Verifier:'): return DirectoryURIVerifier.init_from_string(s) - elif s.startswith('URI:DIR2-CHK:'): + elif s.startswith(b'URI:DIR2-CHK:'): return ImmutableDirectoryURI.init_from_string(s) - elif s.startswith('URI:DIR2-CHK-Verifier:'): + elif s.startswith(b'URI:DIR2-CHK-Verifier:'): return ImmutableDirectoryURIVerifier.init_from_string(s) - elif s.startswith('URI:DIR2-LIT:'): + elif s.startswith(b'URI:DIR2-LIT:'): return LiteralDirectoryURI.init_from_string(s) - elif s.startswith('URI:DIR2-MDMF:'): + elif s.startswith(b'URI:DIR2-MDMF:'): if can_be_writeable: return MDMFDirectoryURI.init_from_string(s) kind = "URI:DIR2-MDMF directory writecap" - elif s.startswith('URI:DIR2-MDMF-RO:'): + elif s.startswith(b'URI:DIR2-MDMF-RO:'): if can_be_mutable: return ReadonlyMDMFDirectoryURI.init_from_string(s) kind = "URI:DIR2-MDMF-RO readcap to a mutable directory" - elif s.startswith('URI:DIR2-MDMF-Verifier:'): + elif s.startswith(b'URI:DIR2-MDMF-Verifier:'): return MDMFDirectoryURIVerifier.init_from_string(s) - elif s.startswith('x-tahoe-future-test-writeable:') and not can_be_writeable: + elif s.startswith(b'x-tahoe-future-test-writeable:') and not can_be_writeable: # For testing how future writeable caps would behave in read-only contexts. kind = "x-tahoe-future-test-writeable: testing cap" - elif s.startswith('x-tahoe-future-test-mutable:') and not can_be_mutable: + elif s.startswith(b'x-tahoe-future-test-mutable:') and not can_be_mutable: # For testing how future mutable readcaps would behave in immutable contexts. kind = "x-tahoe-future-test-mutable: testing cap" else: @@ -829,18 +851,22 @@ def is_uri(s): return False def is_literal_file_uri(s): - if not isinstance(s, str): + if isinstance(s, unicode): + s = s.encode("utf-8") + if not isinstance(s, bytes): return False - return (s.startswith('URI:LIT:') or - s.startswith(ALLEGED_READONLY_PREFIX + 'URI:LIT:') or - s.startswith(ALLEGED_IMMUTABLE_PREFIX + 'URI:LIT:')) + return (s.startswith(b'URI:LIT:') or + s.startswith(ALLEGED_READONLY_PREFIX + b'URI:LIT:') or + s.startswith(ALLEGED_IMMUTABLE_PREFIX + b'URI:LIT:')) def has_uri_prefix(s): - if not isinstance(s, str): + if isinstance(s, unicode): + s = s.encode("utf-8") + if not isinstance(s, bytes): return False - return (s.startswith("URI:") or - s.startswith(ALLEGED_READONLY_PREFIX + 'URI:') or - s.startswith(ALLEGED_IMMUTABLE_PREFIX + 'URI:')) + return (s.startswith(b"URI:") or + s.startswith(ALLEGED_READONLY_PREFIX + b'URI:') or + s.startswith(ALLEGED_IMMUTABLE_PREFIX + b'URI:')) # These take the same keyword arguments as from_string above. @@ -850,26 +876,26 @@ def from_string_dirnode(s, **kwargs): _assert(IDirnodeURI.providedBy(u)) return u -registerAdapter(from_string_dirnode, str, IDirnodeURI) +registerAdapter(from_string_dirnode, bytes, IDirnodeURI) def from_string_filenode(s, **kwargs): u = from_string(s, **kwargs) _assert(IFileURI.providedBy(u)) return u -registerAdapter(from_string_filenode, str, IFileURI) +registerAdapter(from_string_filenode, bytes, IFileURI) def from_string_mutable_filenode(s, **kwargs): u = from_string(s, **kwargs) _assert(IMutableFileURI.providedBy(u)) return u -registerAdapter(from_string_mutable_filenode, str, IMutableFileURI) +registerAdapter(from_string_mutable_filenode, bytes, IMutableFileURI) def from_string_verifier(s, **kwargs): u = from_string(s, **kwargs) _assert(IVerifierURI.providedBy(u)) return u -registerAdapter(from_string_verifier, str, IVerifierURI) +registerAdapter(from_string_verifier, bytes, IVerifierURI) def pack_extension(data): @@ -877,34 +903,36 @@ def pack_extension(data): for k in sorted(data.keys()): value = data[k] if isinstance(value, (int, long)): - value = "%d" % value - assert isinstance(value, str), k - assert re.match(r'^[a-zA-Z_\-]+$', k) - pieces.append(k + ':' + hashutil.netstring(value)) - uri_extension = ''.join(pieces) + value = b"%d" % value + if isinstance(k, unicode): + k = k.encode("utf-8") + assert isinstance(value, bytes), k + assert re.match(br'^[a-zA-Z_\-]+$', k) + pieces.append(k + b':' + hashutil.netstring(value)) + uri_extension = b''.join(pieces) return uri_extension def unpack_extension(data): d = {} while data: - colon = data.index(':') + colon = data.index(b':') key = data[:colon] data = data[colon+1:] - colon = data.index(':') + colon = data.index(b':') number = data[:colon] length = int(number) data = data[colon+1:] value = data[:length] - assert data[length] == ',' + assert data[length:length+1] == b',' data = data[length+1:] d[key] = value # convert certain things to numbers - for intkey in ('size', 'segment_size', 'num_segments', - 'needed_shares', 'total_shares'): + for intkey in (b'size', b'segment_size', b'num_segments', + b'needed_shares', b'total_shares'): if intkey in d: d[intkey] = int(d[intkey]) return d @@ -912,9 +940,9 @@ def unpack_extension(data): def unpack_extension_readable(data): unpacked = unpack_extension(data) - unpacked["UEB_hash"] = hashutil.uri_extension_hash(data) + unpacked[b"UEB_hash"] = hashutil.uri_extension_hash(data) for k in sorted(unpacked.keys()): - if 'hash' in k: + if b'hash' in k: unpacked[k] = base32.b2a(unpacked[k]) return unpacked diff --git a/src/allmydata/util/_python3.py b/src/allmydata/util/_python3.py index a87874f1d..0ab63140f 100644 --- a/src/allmydata/util/_python3.py +++ b/src/allmydata/util/_python3.py @@ -20,7 +20,7 @@ from __future__ import print_function from future.utils import PY2 if PY2: - from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 + from future.builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, str, max, min # noqa: F401 # Keep these sorted alphabetically, to reduce merge conflicts: PORTED_MODULES = [ @@ -32,7 +32,9 @@ PORTED_MODULES = [ "allmydata.crypto.util", "allmydata.hashtree", "allmydata.immutable.happiness_upload", + "allmydata.storage.crawler", "allmydata.test.common_py3", + "allmydata.uri", "allmydata.util._python3", "allmydata.util.abbreviate", "allmydata.util.assertutil", @@ -67,6 +69,7 @@ PORTED_TEST_MODULES = [ "allmydata.test.test_base62", "allmydata.test.test_configutil", "allmydata.test.test_connection_status", + "allmydata.test.test_crawler", "allmydata.test.test_crypto", "allmydata.test.test_deferredutil", "allmydata.test.test_dictutil", @@ -84,6 +87,7 @@ PORTED_TEST_MODULES = [ "allmydata.test.test_spans", "allmydata.test.test_statistics", "allmydata.test.test_time_format", + "allmydata.test.test_uri", "allmydata.test.test_util", "allmydata.test.test_version", ] diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 07416cc93..17a7a2f38 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -21,6 +21,7 @@ from past.builtins import unicode import sys, os, re, locale import unicodedata +import warnings from allmydata.util.assertutil import precondition, _assert from twisted.python import usage @@ -83,9 +84,12 @@ def _reload(): # Despite the Unicode-mode FilePath support added to Twisted in # , we can't yet use - # Unicode-mode FilePaths with INotify on non-Windows platforms - # due to . Supposedly - # 7928 is fixed, though... + # Unicode-mode FilePaths with INotify on non-Windows platforms due to + # . Supposedly 7928 is fixed, + # though... and Tahoe-LAFS doesn't use inotify anymore! + # + # In the interest of not breaking anything, this logic is unchanged for + # Python 2, but on Python 3 the paths are always unicode, like it or not. use_unicode_filepath = PY3 or sys.platform == "win32" _reload() @@ -140,6 +144,8 @@ def unicode_to_argv(s, mangle=False): """ precondition(isinstance(s, unicode), s) if PY3: + warnings.warn("This will be unnecessary once Python 2 is dropped.", + DeprecationWarning) return s if mangle and sys.platform == "win32": @@ -155,11 +161,15 @@ def unicode_to_url(s): # According to RFC 2718, non-ascii characters in URLs must be UTF-8 encoded. # FIXME - return to_str(s) + return to_bytes(s) #precondition(isinstance(s, unicode), s) #return s.encode('utf-8') -def to_str(s): # TODO rename to to_bytes +def to_bytes(s): + """Convert unicode to bytes. + + None and bytes are passed through unchanged. + """ if s is None or isinstance(s, bytes): return s return s.encode('utf-8') @@ -179,8 +189,15 @@ def is_printable_ascii(s): def unicode_to_output(s): """ Encode an unicode object for representation on stdout or stderr. + + On Python 3 just returns the unicode string unchanged, since encoding is + the responsibility of stdout/stderr, they expect Unicode by default. """ precondition(isinstance(s, unicode), s) + if PY3: + warnings.warn("This will be unnecessary once Python 2 is dropped.", + DeprecationWarning) + return s try: out = s.encode(io_encoding) @@ -213,7 +230,7 @@ def _unicode_escape(m, quote_newlines): else: return u'\\x%02x' % (codepoint,) -def _str_escape(m, quote_newlines): # TODO rename to _bytes_escape +def _bytes_escape(m, quote_newlines): """ Takes a re match on bytes, the result is escaped bytes of group(0). """ @@ -248,33 +265,53 @@ def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None): Python-compatible backslash escaping is used. If not explicitly given, quote_newlines is True when quotemarks is True. + + On Python 3, returns Unicode strings. """ precondition(isinstance(s, (bytes, unicode)), s) + encoding = encoding or io_encoding + if quote_newlines is None: quote_newlines = quotemarks - if isinstance(s, bytes): - try: - s = s.decode('utf-8') - except UnicodeDecodeError: - return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _str_escape(m, quote_newlines), s),) + def _encode(s): + if isinstance(s, bytes): + try: + s = s.decode('utf-8') + except UnicodeDecodeError: + return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _bytes_escape(m, quote_newlines), s),) - must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE - if must_double_quote.search(s) is None: - try: - out = s.encode(encoding or io_encoding) - if quotemarks or out.startswith(b'"'): - return b"'%s'" % (out,) - else: - return out - except (UnicodeDecodeError, UnicodeEncodeError): - pass + must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE + if must_double_quote.search(s) is None: + try: + out = s.encode(encoding) + if quotemarks or out.startswith(b'"'): + return b"'%s'" % (out,) + else: + return out + except (UnicodeDecodeError, UnicodeEncodeError): + pass + + escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s) + return b'"%s"' % (escaped.encode(encoding, 'backslashreplace'),) + + result = _encode(s) + if PY3: + # On Python 3 half of what this function does is unnecessary, since + # sys.stdout typically expects Unicode. To ensure no encode errors, one + # can do: + # + # sys.stdout.reconfigure(encoding=sys.stdout.encoding, errors="backslashreplace") + # + # Although the problem is that doesn't work in Python 3.6, only 3.7 or + # later... For now not thinking about it, just returning unicode since + # that is the right thing to do on Python 3. + result = result.decode(encoding) + return result - escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s) - return b'"%s"' % (escaped.encode(encoding or io_encoding, 'backslashreplace'),) def quote_path(path, quotemarks=True): - return quote_output(b"/".join(map(to_str, path)), quotemarks=quotemarks, quote_newlines=True) + return quote_output(b"/".join(map(to_bytes, path)), quotemarks=quotemarks, quote_newlines=True) def quote_local_unicode_path(path, quotemarks=True): precondition(isinstance(path, unicode), path) diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index a930fd2b1..788557480 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -18,7 +18,7 @@ from allmydata.mutable.common import UnrecoverableFileError from allmydata.util import abbreviate from allmydata.util.hashutil import timing_safe_compare from allmydata.util.time_format import format_time, format_delta -from allmydata.util.encodingutil import to_str, quote_output +from allmydata.util.encodingutil import to_bytes, quote_output def get_filenode_metadata(filenode): @@ -133,8 +133,8 @@ def convert_children_json(nodemaker, children_json): data = json.loads(children_json) for (namex, (ctype, propdict)) in data.iteritems(): namex = unicode(namex) - writecap = to_str(propdict.get("rw_uri")) - readcap = to_str(propdict.get("ro_uri")) + writecap = to_bytes(propdict.get("rw_uri")) + readcap = to_bytes(propdict.get("ro_uri")) metadata = propdict.get("metadata", {}) # name= argument is just for error reporting childnode = nodemaker.create_from_cap(writecap, readcap, name=namex) diff --git a/src/allmydata/web/directory.py b/src/allmydata/web/directory.py index a5ba3bed5..9fdecbcb4 100644 --- a/src/allmydata/web/directory.py +++ b/src/allmydata/web/directory.py @@ -22,7 +22,7 @@ from twisted.python.filepath import FilePath from allmydata.util import base32 from allmydata.util.encodingutil import ( - to_str, + to_bytes, quote_output, ) from allmydata.uri import ( @@ -484,7 +484,7 @@ class DirectoryNodeHandler(ReplaceMeMixin, Resource, object): to_dir = to_dir.decode(charset) assert isinstance(to_dir, unicode) to_path = to_dir.split(u"/") - to_root = self.client.nodemaker.create_from_cap(to_str(to_path[0])) + to_root = self.client.nodemaker.create_from_cap(to_bytes(to_path[0])) if not IDirectoryNode.providedBy(to_root): raise WebError("to_dir is not a directory", http.BAD_REQUEST) d = to_root.get_child_at_path(to_path[1:])