Merge remote-tracking branch 'origin/master' into 3351.spans-python-3

2025-02-20 17:52:50 +00:00 · 2020-07-22 09:57:08 -04:00 · 2020-07-22 09:57:08 -04:00 · 2391bd9733
commit 2391bd9733
parent e9eb93468f bca0b17e80
13 changed files with 202 additions and 234 deletions
--- a/misc/python3/ratchet-passing
+++ b/misc/python3/ratchet-passing
@ -1,4 +1,19 @@
 allmydata.test.mutable.test_exceptions.Exceptions.test_repr
+allmydata.test.test_base32.Base32.test_a2b
+allmydata.test.test_base32.Base32.test_a2b_b2a_match_Pythons
+allmydata.test.test_base32.Base32.test_b2a
+allmydata.test.test_base32.Base32.test_b2a_or_none
+allmydata.test.test_base62.Base62.test_ende_0x00
+allmydata.test.test_base62.Base62.test_ende_0x000000
+allmydata.test.test_base62.Base62.test_ende_0x01
+allmydata.test.test_base62.Base62.test_ende_0x0100
+allmydata.test.test_base62.Base62.test_ende_0x010000
+allmydata.test.test_base62.Base62.test_ende_longrandstr
+allmydata.test.test_base62.Base62.test_ende_randstr
+allmydata.test.test_base62.Base62.test_known_values
+allmydata.test.test_base62.Base62.test_num_octets_that_encode_to_this_many_chars
+allmydata.test.test_base62.Base62.test_odd_sizes
+allmydata.test.test_base62.Base62.test_roundtrip
 allmydata.test.test_deferredutil.DeferredUtilTests.test_failure
 allmydata.test.test_deferredutil.DeferredUtilTests.test_gather_results
 allmydata.test.test_deferredutil.DeferredUtilTests.test_success
--- a/newsfragments/3325.minor
+++ b/newsfragments/3325.minor
--- a/newsfragments/3329.minor
+++ b/newsfragments/3329.minor
--- a/newsfragments/3339.minor
+++ b/newsfragments/3339.minor
--- a/newsfragments/3340.minor
+++ b/newsfragments/3340.minor
--- a/newsfragments/3341.minor
+++ b/newsfragments/3341.minor
--- a/newsfragments/3342.minor
+++ b/newsfragments/3342.minor
--- a/newsfragments/3343.minor
+++ b/newsfragments/3343.minor
--- a/src/allmydata/test/test_base32.py
+++ b/src/allmydata/test/test_base32.py
@ -1,6 +1,16 @@
 """
 Tests for allmydata.util.base32.
+
+Ported to Python 3.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401

 import base64

@ -22,7 +32,9 @@ class Base32(unittest.TestCase):
        self.failUnlessEqual(encoded, x)
        self.assertIsInstance(encoded, bytes)
        self.assertTrue(base32.could_be_base32_encoded(encoded))
-        self.assertEqual(base32.a2b(encoded), input_bytes)
+        decoded = base32.a2b(encoded)
+        self.assertEqual(decoded, input_bytes)
+        self.assertIsInstance(decoded, bytes)

    def test_b2a(self):
        self.failUnlessEqual(base32.b2a(b"\x12\x34"), b"ci2a")
--- a/src/allmydata/test/test_base62.py
+++ b/src/allmydata/test/test_base62.py
@ -1,7 +1,22 @@
-import random, unittest
+"""
+Tests for allmydata.util.base62.
+
+Ported to Python 3.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401

 from past.builtins import chr as byteschr

+import random, unittest
+
 from hypothesis import (
    strategies as st,
    given,
@ -10,20 +25,41 @@ from hypothesis import (
 from allmydata.util import base62, mathutil

 def insecurerandstr(n):
-    return b''.join(map(byteschr, map(random.randrange, [0]*n, [256]*n)))
+    return bytes(list(map(random.randrange, [0]*n, [256]*n)))

-class T(unittest.TestCase):
+class Base62(unittest.TestCase):
    def _test_num_octets_that_encode_to_this_many_chars(self, chars, octets):
        assert base62.num_octets_that_encode_to_this_many_chars(chars) == octets, "%s != %s <- %s" % (octets, base62.num_octets_that_encode_to_this_many_chars(chars), chars)

-    def _test_ende(self, bs):
-        ascii=base62.b2a(bs)
-        bs2=base62.a2b(ascii)
-        assert bs2 == bs, "bs2: %s:%s, bs: %s:%s, ascii: %s:%s" % (len(bs2), repr(bs2), len(bs), repr(bs), len(ascii), repr(ascii))
+    def _test_roundtrip(self, bs):
+        encoded = base62.b2a(bs)
+        decoded = base62.a2b(encoded)
+        self.assertEqual(decoded, bs)
+        self.assertIsInstance(encoded, bytes)
+        self.assertIsInstance(bs, bytes)
+        self.assertIsInstance(decoded, bytes)
+        # Encoded string only uses values from the base62 allowed characters:
+        self.assertFalse(set(encoded) - set(base62.chars))

    @given(input_bytes=st.binary(max_size=100))
    def test_roundtrip(self, input_bytes):
-        self._test_ende(input_bytes)
+        self._test_roundtrip(input_bytes)
+
+    def test_known_values(self):
+        """Known values to ensure the algorithm hasn't changed."""
+
+        def check_expected(plaintext, encoded):
+            result1 = base62.b2a(plaintext)
+            self.assertEqual(encoded, result1)
+            result2 = base62.a2b(encoded)
+            self.assertEqual(plaintext, result2)
+
+        check_expected(b"hello", b'7tQLFHz')
+        check_expected(b"", b'0')
+        check_expected(b"zzz", b'0Xg7e')
+        check_expected(b"\x36\xffWAT", b'49pq4mq')
+        check_expected(b"1234 22323", b'1A0afZe9mxSZpz')
+        check_expected(b"______", b'0TmAuCHJX')

    def test_num_octets_that_encode_to_this_many_chars(self):
        return self._test_num_octets_that_encode_to_this_many_chars(2, 1)
@ -32,25 +68,25 @@ class T(unittest.TestCase):
        return self._test_num_octets_that_encode_to_this_many_chars(6, 4)

    def test_ende_0x00(self):
-        return self._test_ende(b'\x00')
+        return self._test_roundtrip(b'\x00')

    def test_ende_0x01(self):
-        return self._test_ende(b'\x01')
+        return self._test_roundtrip(b'\x01')

    def test_ende_0x0100(self):
-        return self._test_ende(b'\x01\x00')
+        return self._test_roundtrip(b'\x01\x00')

    def test_ende_0x000000(self):
-        return self._test_ende(b'\x00\x00\x00')
+        return self._test_roundtrip(b'\x00\x00\x00')

    def test_ende_0x010000(self):
-        return self._test_ende(b'\x01\x00\x00')
+        return self._test_roundtrip(b'\x01\x00\x00')

    def test_ende_randstr(self):
-        return self._test_ende(insecurerandstr(2**4))
+        return self._test_roundtrip(insecurerandstr(2**4))

    def test_ende_longrandstr(self):
-        return self._test_ende(insecurerandstr(random.randrange(0, 2**10)))
+        return self._test_roundtrip(insecurerandstr(random.randrange(0, 2**10)))

    def test_odd_sizes(self):
        for j in range(2**6):
@ -59,19 +95,12 @@ class T(unittest.TestCase):
            bs = insecurerandstr(numos)
            # zero-out unused least-sig bits
            if lib%8:
-                b=ord(bs[-1])
+                b = ord(bs[-1:])
                b = b >> (8 - (lib%8))
                b = b << (8 - (lib%8))
-                bs = bs[:-1] + chr(b)
+                bs = bs[:-1] + byteschr(b)
            asl = base62.b2a_l(bs, lib)
            assert len(asl) == base62.num_chars_that_this_many_octets_encode_to(numos) # the size of the base-62 encoding must be just right
            bs2l = base62.a2b_l(asl, lib)
            assert len(bs2l) == numos # the size of the result must be just right
            assert bs == bs2l
-
-def suite():
-    suite = unittest.makeSuite(T, 'test')
-    return suite
-
-if __name__ == "__main__":
-    unittest.main()
--- a/src/allmydata/util/_python3.py
+++ b/src/allmydata/util/_python3.py
@ -16,6 +16,8 @@ if PY2:
 # Keep these sorted alphabetically, to reduce merge conflicts:
 PORTED_MODULES = [
    "allmydata.util.assertutil",
+    "allmydata.util.base32",
+    "allmydata.util.base62",
    "allmydata.util.deferredutil",
    "allmydata.util.humanreadable",
    "allmydata.util.mathutil",
@ -26,8 +28,15 @@ PORTED_MODULES = [
 ]

 PORTED_TEST_MODULES = [
+    "allmydata.test.test_base32",
+    "allmydata.test.test_base62",
    "allmydata.test.test_deferredutil",
    "allmydata.test.test_humanreadable",
    "allmydata.test.test_python3",
    "allmydata.test.test_spans",
 ]
+
+
+if __name__ == '__main__':
+    from subprocess import check_call
+    check_call(["trial"] + PORTED_TEST_MODULES)
--- a/src/allmydata/util/base32.py
+++ b/src/allmydata/util/base32.py
@ -1,17 +1,42 @@
-# from the Python Standard Library
-import six
-import string
+"""
+Base32 encoding.
+
+Ported to Python 3.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401
+
+if PY2:
+    def backwardscompat_bytes(b):
+        """
+        Replace Future bytes with native Python 2 bytes, so % works
+        consistently until other modules are ported.
+        """
+        return getattr(b, "__native__", lambda: b)()
+    import string
+    maketrans = string.maketrans
+else:
+    def backwardscompat_bytes(b):
+        return b
+    maketrans = bytes.maketrans
+
+import base64

 from allmydata.util.assertutil import precondition

-z_base_32_alphabet = "ybndrfg8ejkmcpqxot1uwisza345h769" # Zooko's choice, rationale in "DESIGN" doc
-rfc3548_alphabet = "abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus...
+rfc3548_alphabet = b"abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus...
 chars = rfc3548_alphabet

-vals = ''.join(map(chr, range(32)))
-c2vtranstable = string.maketrans(chars, vals)
-v2ctranstable = string.maketrans(vals, chars)
-identitytranstable = string.maketrans('', '')
+vals = backwardscompat_bytes(bytes(range(32)))
+c2vtranstable = maketrans(chars, vals)
+v2ctranstable = maketrans(vals, chars)
+identitytranstable = maketrans(b'', b'')

 def _get_trailing_chars_without_lsbs(N, d):
    """
@ -22,9 +47,9 @@ def _get_trailing_chars_without_lsbs(N, d):
        s.extend(_get_trailing_chars_without_lsbs(N+1, d=d))
    i = 0
    while i < len(chars):
-        if not d.has_key(i):
+        if i not in d:
            d[i] = None
-            s.append(chars[i])
+            s.append(chars[i:i+1])
        i = i + 2**N
    return s

@ -33,103 +58,31 @@ def get_trailing_chars_without_lsbs(N):
    if N == 0:
        return chars
    d = {}
-    return ''.join(_get_trailing_chars_without_lsbs(N, d=d))
+    return b''.join(_get_trailing_chars_without_lsbs(N, d=d))

-BASE32CHAR = '['+get_trailing_chars_without_lsbs(0)+']'
-BASE32CHAR_4bits = '['+get_trailing_chars_without_lsbs(1)+']'
-BASE32CHAR_3bits = '['+get_trailing_chars_without_lsbs(2)+']'
-BASE32CHAR_2bits = '['+get_trailing_chars_without_lsbs(3)+']'
-BASE32CHAR_1bits = '['+get_trailing_chars_without_lsbs(4)+']'
-BASE32STR_1byte = BASE32CHAR+BASE32CHAR_3bits
-BASE32STR_2bytes = BASE32CHAR+'{3}'+BASE32CHAR_1bits
-BASE32STR_3bytes = BASE32CHAR+'{4}'+BASE32CHAR_4bits
-BASE32STR_4bytes = BASE32CHAR+'{6}'+BASE32CHAR_2bits
-BASE32STR_anybytes = '((?:%s{8})*' % (BASE32CHAR,) + "(?:|%s|%s|%s|%s))" % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes)
+BASE32CHAR = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(0)+b']')
+BASE32CHAR_4bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(1)+b']')
+BASE32CHAR_3bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(2)+b']')
+BASE32CHAR_2bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(3)+b']')
+BASE32CHAR_1bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(4)+b']')
+BASE32STR_1byte = backwardscompat_bytes(BASE32CHAR+BASE32CHAR_3bits)
+BASE32STR_2bytes = backwardscompat_bytes(BASE32CHAR+b'{3}'+BASE32CHAR_1bits)
+BASE32STR_3bytes = backwardscompat_bytes(BASE32CHAR+b'{4}'+BASE32CHAR_4bits)
+BASE32STR_4bytes = backwardscompat_bytes(BASE32CHAR+b'{6}'+BASE32CHAR_2bits)
+BASE32STR_anybytes = backwardscompat_bytes(bytes(b'((?:%s{8})*') % (BASE32CHAR,) + bytes(b"(?:|%s|%s|%s|%s))") % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes))

 def b2a(os):
    """
-    @param os the data to be encoded (a string)
+    @param os the data to be encoded (as bytes)

-    @return the contents of os in base-32 encoded form
+    @return the contents of os in base-32 encoded form, as bytes
    """
-    return _b2a_l(os, len(os)*8)
+    return base64.b32encode(os).rstrip(b"=").lower()

 def b2a_or_none(os):
    if os is not None:
        return b2a(os)

-def _b2a_l(os, lengthinbits):
-    """
-    @param os the data to be encoded (a string)
-    @param lengthinbits the number of bits of data in os to be encoded
-
-    b2a_l() will generate a base-32 encoded string big enough to encode lengthinbits bits.  So for
-    example if os is 2 bytes long and lengthinbits is 15, then b2a_l() will generate a 3-character-
-    long base-32 encoded string (since 3 quintets is sufficient to encode 15 bits).  If os is
-    2 bytes long and lengthinbits is 16 (or None), then b2a_l() will generate a 4-character string.
-    Note that b2a_l() does not mask off unused least-significant bits, so for example if os is
-    2 bytes long and lengthinbits is 15, then you must ensure that the unused least-significant bit
-    of os is a zero bit or you will get the wrong result.  This precondition is tested by assertions
-    if assertions are enabled.
-
-    Warning: if you generate a base-32 encoded string with b2a_l(), and then someone else tries to
-    decode it by calling a2b() instead of  a2b_l(), then they will (probably) get a different
-    string than the one you encoded!  So only use b2a_l() when you are sure that the encoding and
-    decoding sides know exactly which lengthinbits to use.  If you do not have a way for the
-    encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and
-    a2b().  The only drawback to using b2a() over b2a_l() is that when you have a number of
-    bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-32 encoded
-    string that is one or two characters longer than necessary.
-
-    @return the contents of os in base-32 encoded form
-    """
-    precondition(isinstance(lengthinbits, (int, long,)), "lengthinbits is required to be an integer.", lengthinbits=lengthinbits)
-    precondition((lengthinbits+7)/8 == len(os), "lengthinbits is required to specify a number of bits storable in exactly len(os) octets.", lengthinbits=lengthinbits, lenos=len(os))
-
-    os = map(ord, os)
-
-    numquintets = (lengthinbits+4)/5
-    numoctetsofdata = (lengthinbits+7)/8
-    # print "numoctetsofdata: %s, len(os): %s, lengthinbits: %s, numquintets: %s" % (numoctetsofdata, len(os), lengthinbits, numquintets,)
-    # strip trailing octets that won't be used
-    del os[numoctetsofdata:]
-    # zero out any unused bits in the final octet
-    if lengthinbits % 8 != 0:
-        os[-1] = os[-1] >> (8-(lengthinbits % 8))
-        os[-1] = os[-1] << (8-(lengthinbits % 8))
-    # append zero octets for padding if needed
-    numoctetsneeded = (numquintets*5+7)/8 + 1
-    os.extend([0]*(numoctetsneeded-len(os)))
-
-    quintets = []
-    cutoff = 256
-    num = os[0]
-    i = 0
-    while len(quintets) < numquintets:
-        i = i + 1
-        assert len(os) > i, "len(os): %s, i: %s, len(quintets): %s, numquintets: %s, lengthinbits: %s, numoctetsofdata: %s, numoctetsneeded: %s, os: %s" % (len(os), i, len(quintets), numquintets, lengthinbits, numoctetsofdata, numoctetsneeded, os,)
-        num = num * 256
-        num = num + os[i]
-        if cutoff == 1:
-            cutoff = 256
-            continue
-        cutoff = cutoff * 8
-        quintet = num / cutoff
-        quintets.append(quintet)
-        num = num - (quintet * cutoff)
-
-        cutoff = cutoff / 32
-        quintet = num / cutoff
-        quintets.append(quintet)
-        num = num - (quintet * cutoff)
-
-    if len(quintets) > numquintets:
-        assert len(quintets) == (numquintets+1), "len(quintets): %s, numquintets: %s, quintets: %s" % (len(quintets), numquintets, quintets,)
-        quintets = quintets[:numquintets]
-    res = string.translate(string.join(map(chr, quintets), ''), v2ctranstable)
-    assert could_be_base32_encoded_l(res, lengthinbits), "lengthinbits: %s, res: %s" % (lengthinbits, res,)
-    return res
-
 # b2a() uses the minimal number of quintets sufficient to encode the binary
 # input.  It just so happens that the relation is like this (everything is
 # modulo 40 bits).
@ -144,7 +97,9 @@ NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,)

 NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4)
 NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,)
-NUM_QS_TO_NUM_BITS=tuple(map(lambda x: x*8, NUM_QS_TO_NUM_OS))
+NUM_QS_TO_NUM_BITS=tuple([_x*8 for _x in NUM_QS_TO_NUM_OS])
+if PY2:
+    del _x

 # A fast way to determine whether a given string *could* be base-32 encoded data, assuming that the
 # original data had 8K bits for a positive integer K.
@ -152,8 +107,8 @@ NUM_QS_TO_NUM_BITS=tuple(map(lambda x: x*8, NUM_QS_TO_NUM_OS))
 # tells whether the final character is reasonable.
 def add_check_array(cs, sfmap):
    checka=[0] * 256
-    for c in cs:
-        checka[ord(c)] = 1
+    for c in bytes(cs):
+        checka[c] = 1
    sfmap.append(tuple(checka))

 def init_s8():
@ -163,106 +118,29 @@ def init_s8():
        if NUM_QS_LEGIT[lenmod8]:
            add_check_array(get_trailing_chars_without_lsbs(4-(NUM_QS_TO_NUM_BITS[lenmod8]%5)), s8)
        else:
-            add_check_array('', s8)
+            add_check_array(b'', s8)
    return tuple(s8)
 s8 = init_s8()

-# A somewhat fast way to determine whether a given string *could* be base-32 encoded data, given a
-# lengthinbits.
-# The boolean value of s5[lengthinbits%5][ord(s[-1])], where s is the possibly base-32 encoded
-# string tells whether the final character is reasonable.
-def init_s5():
-    s5 = []
-    add_check_array(get_trailing_chars_without_lsbs(0), s5)
-    for lenmod5 in [1,2,3,4]:
-        add_check_array(get_trailing_chars_without_lsbs(5-lenmod5), s5)
-    return tuple(s5)
-s5 = init_s5()
-
-def could_be_base32_encoded(s, s8=s8, tr=string.translate, identitytranstable=identitytranstable, chars=chars):
-    precondition(isinstance(s, six.binary_type), s)
-    if s == '':
+def could_be_base32_encoded(s, s8=s8, tr=bytes.translate, identitytranstable=identitytranstable, chars=chars):
+    precondition(isinstance(s, bytes), s)
+    if s == b'':
        return True
-    return s8[len(s)%8][ord(s[-1])] and not tr(s, identitytranstable, chars)
-
-def could_be_base32_encoded_l(s, lengthinbits, s5=s5, tr=string.translate, identitytranstable=identitytranstable, chars=chars):
-    precondition(isinstance(s, six.binary_type), s)
-    if s == '':
-        return True
-    assert lengthinbits%5 < len(s5), lengthinbits
-    assert ord(s[-1]) < s5[lengthinbits%5]
-    return (((lengthinbits+4)/5) == len(s)) and s5[lengthinbits%5][ord(s[-1])] and not string.translate(s, identitytranstable, chars)
-
-def num_octets_that_encode_to_this_many_quintets(numqs):
-    # Here is a computation that conveniently expresses this:
-    return (numqs*5+3)/8
+    s = bytes(s)  # On Python 2, make sure we're using modern bytes
+    return s8[len(s)%8][s[-1]] and not tr(s, identitytranstable, chars)

 def a2b(cs):
    """
-    @param cs the base-32 encoded data (a string)
+    @param cs the base-32 encoded data (as bytes)
    """
    precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs)
-    precondition(isinstance(cs, six.binary_type), cs)
+    precondition(isinstance(cs, bytes), cs)

-    return _a2b_l(cs, num_octets_that_encode_to_this_many_quintets(len(cs))*8)
-
-def _a2b_l(cs, lengthinbits):
-    """
-    @param lengthinbits the number of bits of data in encoded into cs
-
-    a2b_l() will return a result big enough to hold lengthinbits bits.  So for example if cs is
-    4 characters long (encoding at least 15 and up to 20 bits) and lengthinbits is 16, then a2b_l()
-    will return a string of length 2 (since 2 bytes is sufficient to store 16 bits).  If cs is 4
-    characters long and lengthinbits is 20, then a2b_l() will return a string of length 3 (since
-    3 bytes is sufficient to store 20 bits).  Note that b2a_l() does not mask off unused least-
-    significant bits, so for example if cs is 4 characters long and lengthinbits is 17, then you
-    must ensure that all three of the unused least-significant bits of cs are zero bits or you will
-    get the wrong result.  This precondition is tested by assertions if assertions are enabled.
-    (Generally you just require the encoder to ensure this consistency property between the least
-    significant zero bits and value of lengthinbits, and reject strings that have a length-in-bits
-    which isn't a multiple of 8 and yet don't have trailing zero bits, as improperly encoded.)
-
-    Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l().
-
-    @return the data encoded in cs
-    """
-    precondition(could_be_base32_encoded_l(cs, lengthinbits), "cs is required to be possibly base32 encoded data.", cs=cs, lengthinbits=lengthinbits)
-    precondition(isinstance(cs, six.binary_type), cs)
-    if cs == '':
-        return ''
-
-    qs = map(ord, string.translate(cs, c2vtranstable))
-
-    numoctets = (lengthinbits+7)/8
-    numquintetsofdata = (lengthinbits+4)/5
-    # strip trailing quintets that won't be used
-    del qs[numquintetsofdata:]
-    # zero out any unused bits in the final quintet
-    if lengthinbits % 5 != 0:
-        qs[-1] = qs[-1] >> (5-(lengthinbits % 5))
-        qs[-1] = qs[-1] << (5-(lengthinbits % 5))
-    # append zero quintets for padding if needed
-    numquintetsneeded = (numoctets*8+4)/5
-    qs.extend([0]*(numquintetsneeded-len(qs)))
-
-    octets = []
-    pos = 2048
-    num = qs[0] * pos
-    i = 1
-    while len(octets) < numoctets:
-        while pos > 256:
-            pos = pos / 32
-            num = num + (qs[i] * pos)
-            i = i + 1
-        octet = num / 256
-        octets.append(octet)
-        num = num - (octet * 256)
-        num = num * 256
-        pos = pos * 256
-    assert len(octets) == numoctets, "len(octets): %s, numoctets: %s, octets: %s" % (len(octets), numoctets, octets,)
-    res = ''.join(map(chr, octets))
-    precondition(_b2a_l(res, lengthinbits) == cs, "cs is required to be the canonical base-32 encoding of some data.", b2a(res), res=res, cs=cs)
-    return res
+    cs = cs.upper()
+    # Add padding back, to make Python's base64 module happy:
+    while (len(cs) * 5) % 8 != 0:
+        cs += b"="
+    return base64.b32decode(cs)


 __all__ = ["b2a", "a2b", "b2a_or_none", "BASE32CHAR_3bits", "BASE32CHAR_1bits", "BASE32CHAR", "BASE32STR_anybytes", "could_be_base32_encoded"]
--- a/src/allmydata/util/base62.py
+++ b/src/allmydata/util/base62.py
@ -1,22 +1,43 @@
-# from the Python Standard Library
-import string
+"""
+Base62 encoding.
+
+Ported to Python 3.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401
+
+if PY2:
+    import string
+    maketrans = string.maketrans
+    translate = string.translate
+else:
+    maketrans = bytes.maketrans
+    translate = bytes.translate
+
+from past.builtins import chr as byteschr

 from allmydata.util.mathutil import log_ceil, log_floor

-chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
+chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

-BASE62CHAR = '[' + chars + ']'
+BASE62CHAR = b'[' + chars + b']'

-vals = ''.join([chr(i) for i in range(62)])
-c2vtranstable = string.maketrans(chars, vals)
-v2ctranstable = string.maketrans(vals, chars)
-identitytranstable = string.maketrans(chars, chars)
+vals = b''.join([byteschr(i) for i in range(62)])
+c2vtranstable = maketrans(chars, vals)
+v2ctranstable = maketrans(vals, chars)
+identitytranstable = maketrans(chars, chars)

 def b2a(os):
    """
-    @param os the data to be encoded (a string)
+    @param os the data to be encoded (as bytes)

-    @return the contents of os in base-62 encoded form
+    @return the contents of os in base-62 encoded form, as bytes
    """
    cs = b2a_l(os, len(os)*8)
    assert num_octets_that_encode_to_this_many_chars(len(cs)) == len(os), "%s != %s, numchars: %s" % (num_octets_that_encode_to_this_many_chars(len(cs)), len(os), len(cs))
@ -24,7 +45,7 @@ def b2a(os):

 def b2a_l(os, lengthinbits):
    """
-    @param os the data to be encoded (a string)
+    @param os the data to be encoded (as bytes)
    @param lengthinbits the number of bits of data in os to be encoded

    b2a_l() will generate a base-62 encoded string big enough to encode
@ -45,9 +66,11 @@ def b2a_l(os, lengthinbits):
    bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-62 encoded
    string that is one or two characters longer than necessary.

-    @return the contents of os in base-62 encoded form
+    @return the contents of os in base-62 encoded form, as bytes
    """
-    os = [ord(o) for o in reversed(os)] # treat os as big-endian -- and we want to process the least-significant o first
+    # We call bytes() again for Python 2, to ensure literals are using future's
+    # Python 3-compatible variant.
+    os = [o for o in reversed(bytes(os))] # treat os as big-endian -- and we want to process the least-significant o first

    value = 0
    numvalues = 1 # the number of possible values that value could be
@ -62,7 +85,7 @@ def b2a_l(os, lengthinbits):
        value //= 62
        numvalues //= 62

-    return string.translate(''.join([chr(c) for c in reversed(chars)]), v2ctranstable) # make it big-endian
+    return translate(bytes([c for c in reversed(chars)]), v2ctranstable) # make it big-endian

 def num_octets_that_encode_to_this_many_chars(numcs):
    return log_floor(62**numcs, 256)
@ -89,9 +112,11 @@ def a2b_l(cs, lengthinbits):
    Please see the warning in the docstring of b2a_l() regarding the use of
    b2a() versus b2a_l().

-    @return the data encoded in cs
+    @return the data encoded in cs, as bytes
    """
-    cs = [ord(c) for c in reversed(string.translate(cs, c2vtranstable))] # treat cs as big-endian -- and we want to process the least-significant c first
+    # We call bytes() again for Python 2, to ensure literals are using future's
+    # Python 3-compatible variant.
+    cs = [c for c in reversed(bytes(translate(cs, c2vtranstable)))] # treat cs as big-endian -- and we want to process the least-significant c first

    value = 0
    numvalues = 1 # the number of possible values that value could be
@ -101,10 +126,10 @@ def a2b_l(cs, lengthinbits):
        numvalues *= 62

    numvalues = 2**lengthinbits
-    bytes = []
+    result_bytes = []
    while numvalues > 1:
-        bytes.append(value % 256)
+        result_bytes.append(value % 256)
        value //= 256
        numvalues //= 256

-    return ''.join([chr(b) for b in reversed(bytes)]) # make it big-endian
+    return bytes([b for b in reversed(result_bytes)]) # make it big-endian