mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-02-20 17:52:50 +00:00
Merge remote-tracking branch 'origin/master' into 3351.spans-python-3
This commit is contained in:
commit
2391bd9733
@ -1,4 +1,19 @@
|
||||
allmydata.test.mutable.test_exceptions.Exceptions.test_repr
|
||||
allmydata.test.test_base32.Base32.test_a2b
|
||||
allmydata.test.test_base32.Base32.test_a2b_b2a_match_Pythons
|
||||
allmydata.test.test_base32.Base32.test_b2a
|
||||
allmydata.test.test_base32.Base32.test_b2a_or_none
|
||||
allmydata.test.test_base62.Base62.test_ende_0x00
|
||||
allmydata.test.test_base62.Base62.test_ende_0x000000
|
||||
allmydata.test.test_base62.Base62.test_ende_0x01
|
||||
allmydata.test.test_base62.Base62.test_ende_0x0100
|
||||
allmydata.test.test_base62.Base62.test_ende_0x010000
|
||||
allmydata.test.test_base62.Base62.test_ende_longrandstr
|
||||
allmydata.test.test_base62.Base62.test_ende_randstr
|
||||
allmydata.test.test_base62.Base62.test_known_values
|
||||
allmydata.test.test_base62.Base62.test_num_octets_that_encode_to_this_many_chars
|
||||
allmydata.test.test_base62.Base62.test_odd_sizes
|
||||
allmydata.test.test_base62.Base62.test_roundtrip
|
||||
allmydata.test.test_deferredutil.DeferredUtilTests.test_failure
|
||||
allmydata.test.test_deferredutil.DeferredUtilTests.test_gather_results
|
||||
allmydata.test.test_deferredutil.DeferredUtilTests.test_success
|
||||
|
0
newsfragments/3343.minor
Normal file
0
newsfragments/3343.minor
Normal file
@ -1,6 +1,16 @@
|
||||
"""
|
||||
Tests for allmydata.util.base32.
|
||||
|
||||
Ported to Python 3.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from future.utils import PY2
|
||||
if PY2:
|
||||
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min # noqa: F401
|
||||
|
||||
import base64
|
||||
|
||||
@ -22,7 +32,9 @@ class Base32(unittest.TestCase):
|
||||
self.failUnlessEqual(encoded, x)
|
||||
self.assertIsInstance(encoded, bytes)
|
||||
self.assertTrue(base32.could_be_base32_encoded(encoded))
|
||||
self.assertEqual(base32.a2b(encoded), input_bytes)
|
||||
decoded = base32.a2b(encoded)
|
||||
self.assertEqual(decoded, input_bytes)
|
||||
self.assertIsInstance(decoded, bytes)
|
||||
|
||||
def test_b2a(self):
|
||||
self.failUnlessEqual(base32.b2a(b"\x12\x34"), b"ci2a")
|
||||
|
@ -1,7 +1,22 @@
|
||||
import random, unittest
|
||||
"""
|
||||
Tests for allmydata.util.base62.
|
||||
|
||||
Ported to Python 3.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from future.utils import PY2
|
||||
if PY2:
|
||||
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min # noqa: F401
|
||||
|
||||
from past.builtins import chr as byteschr
|
||||
|
||||
import random, unittest
|
||||
|
||||
from hypothesis import (
|
||||
strategies as st,
|
||||
given,
|
||||
@ -10,20 +25,41 @@ from hypothesis import (
|
||||
from allmydata.util import base62, mathutil
|
||||
|
||||
def insecurerandstr(n):
|
||||
return b''.join(map(byteschr, map(random.randrange, [0]*n, [256]*n)))
|
||||
return bytes(list(map(random.randrange, [0]*n, [256]*n)))
|
||||
|
||||
class T(unittest.TestCase):
|
||||
class Base62(unittest.TestCase):
|
||||
def _test_num_octets_that_encode_to_this_many_chars(self, chars, octets):
|
||||
assert base62.num_octets_that_encode_to_this_many_chars(chars) == octets, "%s != %s <- %s" % (octets, base62.num_octets_that_encode_to_this_many_chars(chars), chars)
|
||||
|
||||
def _test_ende(self, bs):
|
||||
ascii=base62.b2a(bs)
|
||||
bs2=base62.a2b(ascii)
|
||||
assert bs2 == bs, "bs2: %s:%s, bs: %s:%s, ascii: %s:%s" % (len(bs2), repr(bs2), len(bs), repr(bs), len(ascii), repr(ascii))
|
||||
def _test_roundtrip(self, bs):
|
||||
encoded = base62.b2a(bs)
|
||||
decoded = base62.a2b(encoded)
|
||||
self.assertEqual(decoded, bs)
|
||||
self.assertIsInstance(encoded, bytes)
|
||||
self.assertIsInstance(bs, bytes)
|
||||
self.assertIsInstance(decoded, bytes)
|
||||
# Encoded string only uses values from the base62 allowed characters:
|
||||
self.assertFalse(set(encoded) - set(base62.chars))
|
||||
|
||||
@given(input_bytes=st.binary(max_size=100))
|
||||
def test_roundtrip(self, input_bytes):
|
||||
self._test_ende(input_bytes)
|
||||
self._test_roundtrip(input_bytes)
|
||||
|
||||
def test_known_values(self):
|
||||
"""Known values to ensure the algorithm hasn't changed."""
|
||||
|
||||
def check_expected(plaintext, encoded):
|
||||
result1 = base62.b2a(plaintext)
|
||||
self.assertEqual(encoded, result1)
|
||||
result2 = base62.a2b(encoded)
|
||||
self.assertEqual(plaintext, result2)
|
||||
|
||||
check_expected(b"hello", b'7tQLFHz')
|
||||
check_expected(b"", b'0')
|
||||
check_expected(b"zzz", b'0Xg7e')
|
||||
check_expected(b"\x36\xffWAT", b'49pq4mq')
|
||||
check_expected(b"1234 22323", b'1A0afZe9mxSZpz')
|
||||
check_expected(b"______", b'0TmAuCHJX')
|
||||
|
||||
def test_num_octets_that_encode_to_this_many_chars(self):
|
||||
return self._test_num_octets_that_encode_to_this_many_chars(2, 1)
|
||||
@ -32,25 +68,25 @@ class T(unittest.TestCase):
|
||||
return self._test_num_octets_that_encode_to_this_many_chars(6, 4)
|
||||
|
||||
def test_ende_0x00(self):
|
||||
return self._test_ende(b'\x00')
|
||||
return self._test_roundtrip(b'\x00')
|
||||
|
||||
def test_ende_0x01(self):
|
||||
return self._test_ende(b'\x01')
|
||||
return self._test_roundtrip(b'\x01')
|
||||
|
||||
def test_ende_0x0100(self):
|
||||
return self._test_ende(b'\x01\x00')
|
||||
return self._test_roundtrip(b'\x01\x00')
|
||||
|
||||
def test_ende_0x000000(self):
|
||||
return self._test_ende(b'\x00\x00\x00')
|
||||
return self._test_roundtrip(b'\x00\x00\x00')
|
||||
|
||||
def test_ende_0x010000(self):
|
||||
return self._test_ende(b'\x01\x00\x00')
|
||||
return self._test_roundtrip(b'\x01\x00\x00')
|
||||
|
||||
def test_ende_randstr(self):
|
||||
return self._test_ende(insecurerandstr(2**4))
|
||||
return self._test_roundtrip(insecurerandstr(2**4))
|
||||
|
||||
def test_ende_longrandstr(self):
|
||||
return self._test_ende(insecurerandstr(random.randrange(0, 2**10)))
|
||||
return self._test_roundtrip(insecurerandstr(random.randrange(0, 2**10)))
|
||||
|
||||
def test_odd_sizes(self):
|
||||
for j in range(2**6):
|
||||
@ -59,19 +95,12 @@ class T(unittest.TestCase):
|
||||
bs = insecurerandstr(numos)
|
||||
# zero-out unused least-sig bits
|
||||
if lib%8:
|
||||
b=ord(bs[-1])
|
||||
b = ord(bs[-1:])
|
||||
b = b >> (8 - (lib%8))
|
||||
b = b << (8 - (lib%8))
|
||||
bs = bs[:-1] + chr(b)
|
||||
bs = bs[:-1] + byteschr(b)
|
||||
asl = base62.b2a_l(bs, lib)
|
||||
assert len(asl) == base62.num_chars_that_this_many_octets_encode_to(numos) # the size of the base-62 encoding must be just right
|
||||
bs2l = base62.a2b_l(asl, lib)
|
||||
assert len(bs2l) == numos # the size of the result must be just right
|
||||
assert bs == bs2l
|
||||
|
||||
def suite():
|
||||
suite = unittest.makeSuite(T, 'test')
|
||||
return suite
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
@ -16,6 +16,8 @@ if PY2:
|
||||
# Keep these sorted alphabetically, to reduce merge conflicts:
|
||||
PORTED_MODULES = [
|
||||
"allmydata.util.assertutil",
|
||||
"allmydata.util.base32",
|
||||
"allmydata.util.base62",
|
||||
"allmydata.util.deferredutil",
|
||||
"allmydata.util.humanreadable",
|
||||
"allmydata.util.mathutil",
|
||||
@ -26,8 +28,15 @@ PORTED_MODULES = [
|
||||
]
|
||||
|
||||
PORTED_TEST_MODULES = [
|
||||
"allmydata.test.test_base32",
|
||||
"allmydata.test.test_base62",
|
||||
"allmydata.test.test_deferredutil",
|
||||
"allmydata.test.test_humanreadable",
|
||||
"allmydata.test.test_python3",
|
||||
"allmydata.test.test_spans",
|
||||
]
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from subprocess import check_call
|
||||
check_call(["trial"] + PORTED_TEST_MODULES)
|
||||
|
@ -1,17 +1,42 @@
|
||||
# from the Python Standard Library
|
||||
import six
|
||||
import string
|
||||
"""
|
||||
Base32 encoding.
|
||||
|
||||
Ported to Python 3.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from future.utils import PY2
|
||||
if PY2:
|
||||
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min # noqa: F401
|
||||
|
||||
if PY2:
|
||||
def backwardscompat_bytes(b):
|
||||
"""
|
||||
Replace Future bytes with native Python 2 bytes, so % works
|
||||
consistently until other modules are ported.
|
||||
"""
|
||||
return getattr(b, "__native__", lambda: b)()
|
||||
import string
|
||||
maketrans = string.maketrans
|
||||
else:
|
||||
def backwardscompat_bytes(b):
|
||||
return b
|
||||
maketrans = bytes.maketrans
|
||||
|
||||
import base64
|
||||
|
||||
from allmydata.util.assertutil import precondition
|
||||
|
||||
z_base_32_alphabet = "ybndrfg8ejkmcpqxot1uwisza345h769" # Zooko's choice, rationale in "DESIGN" doc
|
||||
rfc3548_alphabet = "abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus...
|
||||
rfc3548_alphabet = b"abcdefghijklmnopqrstuvwxyz234567" # RFC3548 standard used by Gnutella, Content-Addressable Web, THEX, Bitzi, Web-Calculus...
|
||||
chars = rfc3548_alphabet
|
||||
|
||||
vals = ''.join(map(chr, range(32)))
|
||||
c2vtranstable = string.maketrans(chars, vals)
|
||||
v2ctranstable = string.maketrans(vals, chars)
|
||||
identitytranstable = string.maketrans('', '')
|
||||
vals = backwardscompat_bytes(bytes(range(32)))
|
||||
c2vtranstable = maketrans(chars, vals)
|
||||
v2ctranstable = maketrans(vals, chars)
|
||||
identitytranstable = maketrans(b'', b'')
|
||||
|
||||
def _get_trailing_chars_without_lsbs(N, d):
|
||||
"""
|
||||
@ -22,9 +47,9 @@ def _get_trailing_chars_without_lsbs(N, d):
|
||||
s.extend(_get_trailing_chars_without_lsbs(N+1, d=d))
|
||||
i = 0
|
||||
while i < len(chars):
|
||||
if not d.has_key(i):
|
||||
if i not in d:
|
||||
d[i] = None
|
||||
s.append(chars[i])
|
||||
s.append(chars[i:i+1])
|
||||
i = i + 2**N
|
||||
return s
|
||||
|
||||
@ -33,103 +58,31 @@ def get_trailing_chars_without_lsbs(N):
|
||||
if N == 0:
|
||||
return chars
|
||||
d = {}
|
||||
return ''.join(_get_trailing_chars_without_lsbs(N, d=d))
|
||||
return b''.join(_get_trailing_chars_without_lsbs(N, d=d))
|
||||
|
||||
BASE32CHAR = '['+get_trailing_chars_without_lsbs(0)+']'
|
||||
BASE32CHAR_4bits = '['+get_trailing_chars_without_lsbs(1)+']'
|
||||
BASE32CHAR_3bits = '['+get_trailing_chars_without_lsbs(2)+']'
|
||||
BASE32CHAR_2bits = '['+get_trailing_chars_without_lsbs(3)+']'
|
||||
BASE32CHAR_1bits = '['+get_trailing_chars_without_lsbs(4)+']'
|
||||
BASE32STR_1byte = BASE32CHAR+BASE32CHAR_3bits
|
||||
BASE32STR_2bytes = BASE32CHAR+'{3}'+BASE32CHAR_1bits
|
||||
BASE32STR_3bytes = BASE32CHAR+'{4}'+BASE32CHAR_4bits
|
||||
BASE32STR_4bytes = BASE32CHAR+'{6}'+BASE32CHAR_2bits
|
||||
BASE32STR_anybytes = '((?:%s{8})*' % (BASE32CHAR,) + "(?:|%s|%s|%s|%s))" % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes)
|
||||
BASE32CHAR = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(0)+b']')
|
||||
BASE32CHAR_4bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(1)+b']')
|
||||
BASE32CHAR_3bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(2)+b']')
|
||||
BASE32CHAR_2bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(3)+b']')
|
||||
BASE32CHAR_1bits = backwardscompat_bytes(b'['+get_trailing_chars_without_lsbs(4)+b']')
|
||||
BASE32STR_1byte = backwardscompat_bytes(BASE32CHAR+BASE32CHAR_3bits)
|
||||
BASE32STR_2bytes = backwardscompat_bytes(BASE32CHAR+b'{3}'+BASE32CHAR_1bits)
|
||||
BASE32STR_3bytes = backwardscompat_bytes(BASE32CHAR+b'{4}'+BASE32CHAR_4bits)
|
||||
BASE32STR_4bytes = backwardscompat_bytes(BASE32CHAR+b'{6}'+BASE32CHAR_2bits)
|
||||
BASE32STR_anybytes = backwardscompat_bytes(bytes(b'((?:%s{8})*') % (BASE32CHAR,) + bytes(b"(?:|%s|%s|%s|%s))") % (BASE32STR_1byte, BASE32STR_2bytes, BASE32STR_3bytes, BASE32STR_4bytes))
|
||||
|
||||
def b2a(os):
|
||||
"""
|
||||
@param os the data to be encoded (a string)
|
||||
@param os the data to be encoded (as bytes)
|
||||
|
||||
@return the contents of os in base-32 encoded form
|
||||
@return the contents of os in base-32 encoded form, as bytes
|
||||
"""
|
||||
return _b2a_l(os, len(os)*8)
|
||||
return base64.b32encode(os).rstrip(b"=").lower()
|
||||
|
||||
def b2a_or_none(os):
|
||||
if os is not None:
|
||||
return b2a(os)
|
||||
|
||||
def _b2a_l(os, lengthinbits):
|
||||
"""
|
||||
@param os the data to be encoded (a string)
|
||||
@param lengthinbits the number of bits of data in os to be encoded
|
||||
|
||||
b2a_l() will generate a base-32 encoded string big enough to encode lengthinbits bits. So for
|
||||
example if os is 2 bytes long and lengthinbits is 15, then b2a_l() will generate a 3-character-
|
||||
long base-32 encoded string (since 3 quintets is sufficient to encode 15 bits). If os is
|
||||
2 bytes long and lengthinbits is 16 (or None), then b2a_l() will generate a 4-character string.
|
||||
Note that b2a_l() does not mask off unused least-significant bits, so for example if os is
|
||||
2 bytes long and lengthinbits is 15, then you must ensure that the unused least-significant bit
|
||||
of os is a zero bit or you will get the wrong result. This precondition is tested by assertions
|
||||
if assertions are enabled.
|
||||
|
||||
Warning: if you generate a base-32 encoded string with b2a_l(), and then someone else tries to
|
||||
decode it by calling a2b() instead of a2b_l(), then they will (probably) get a different
|
||||
string than the one you encoded! So only use b2a_l() when you are sure that the encoding and
|
||||
decoding sides know exactly which lengthinbits to use. If you do not have a way for the
|
||||
encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and
|
||||
a2b(). The only drawback to using b2a() over b2a_l() is that when you have a number of
|
||||
bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-32 encoded
|
||||
string that is one or two characters longer than necessary.
|
||||
|
||||
@return the contents of os in base-32 encoded form
|
||||
"""
|
||||
precondition(isinstance(lengthinbits, (int, long,)), "lengthinbits is required to be an integer.", lengthinbits=lengthinbits)
|
||||
precondition((lengthinbits+7)/8 == len(os), "lengthinbits is required to specify a number of bits storable in exactly len(os) octets.", lengthinbits=lengthinbits, lenos=len(os))
|
||||
|
||||
os = map(ord, os)
|
||||
|
||||
numquintets = (lengthinbits+4)/5
|
||||
numoctetsofdata = (lengthinbits+7)/8
|
||||
# print "numoctetsofdata: %s, len(os): %s, lengthinbits: %s, numquintets: %s" % (numoctetsofdata, len(os), lengthinbits, numquintets,)
|
||||
# strip trailing octets that won't be used
|
||||
del os[numoctetsofdata:]
|
||||
# zero out any unused bits in the final octet
|
||||
if lengthinbits % 8 != 0:
|
||||
os[-1] = os[-1] >> (8-(lengthinbits % 8))
|
||||
os[-1] = os[-1] << (8-(lengthinbits % 8))
|
||||
# append zero octets for padding if needed
|
||||
numoctetsneeded = (numquintets*5+7)/8 + 1
|
||||
os.extend([0]*(numoctetsneeded-len(os)))
|
||||
|
||||
quintets = []
|
||||
cutoff = 256
|
||||
num = os[0]
|
||||
i = 0
|
||||
while len(quintets) < numquintets:
|
||||
i = i + 1
|
||||
assert len(os) > i, "len(os): %s, i: %s, len(quintets): %s, numquintets: %s, lengthinbits: %s, numoctetsofdata: %s, numoctetsneeded: %s, os: %s" % (len(os), i, len(quintets), numquintets, lengthinbits, numoctetsofdata, numoctetsneeded, os,)
|
||||
num = num * 256
|
||||
num = num + os[i]
|
||||
if cutoff == 1:
|
||||
cutoff = 256
|
||||
continue
|
||||
cutoff = cutoff * 8
|
||||
quintet = num / cutoff
|
||||
quintets.append(quintet)
|
||||
num = num - (quintet * cutoff)
|
||||
|
||||
cutoff = cutoff / 32
|
||||
quintet = num / cutoff
|
||||
quintets.append(quintet)
|
||||
num = num - (quintet * cutoff)
|
||||
|
||||
if len(quintets) > numquintets:
|
||||
assert len(quintets) == (numquintets+1), "len(quintets): %s, numquintets: %s, quintets: %s" % (len(quintets), numquintets, quintets,)
|
||||
quintets = quintets[:numquintets]
|
||||
res = string.translate(string.join(map(chr, quintets), ''), v2ctranstable)
|
||||
assert could_be_base32_encoded_l(res, lengthinbits), "lengthinbits: %s, res: %s" % (lengthinbits, res,)
|
||||
return res
|
||||
|
||||
# b2a() uses the minimal number of quintets sufficient to encode the binary
|
||||
# input. It just so happens that the relation is like this (everything is
|
||||
# modulo 40 bits).
|
||||
@ -144,7 +97,9 @@ NUM_OS_TO_NUM_QS=(0, 2, 4, 5, 7,)
|
||||
|
||||
NUM_QS_TO_NUM_OS=(0, 1, 1, 2, 2, 3, 3, 4)
|
||||
NUM_QS_LEGIT=(1, 0, 1, 0, 1, 1, 0, 1,)
|
||||
NUM_QS_TO_NUM_BITS=tuple(map(lambda x: x*8, NUM_QS_TO_NUM_OS))
|
||||
NUM_QS_TO_NUM_BITS=tuple([_x*8 for _x in NUM_QS_TO_NUM_OS])
|
||||
if PY2:
|
||||
del _x
|
||||
|
||||
# A fast way to determine whether a given string *could* be base-32 encoded data, assuming that the
|
||||
# original data had 8K bits for a positive integer K.
|
||||
@ -152,8 +107,8 @@ NUM_QS_TO_NUM_BITS=tuple(map(lambda x: x*8, NUM_QS_TO_NUM_OS))
|
||||
# tells whether the final character is reasonable.
|
||||
def add_check_array(cs, sfmap):
|
||||
checka=[0] * 256
|
||||
for c in cs:
|
||||
checka[ord(c)] = 1
|
||||
for c in bytes(cs):
|
||||
checka[c] = 1
|
||||
sfmap.append(tuple(checka))
|
||||
|
||||
def init_s8():
|
||||
@ -163,106 +118,29 @@ def init_s8():
|
||||
if NUM_QS_LEGIT[lenmod8]:
|
||||
add_check_array(get_trailing_chars_without_lsbs(4-(NUM_QS_TO_NUM_BITS[lenmod8]%5)), s8)
|
||||
else:
|
||||
add_check_array('', s8)
|
||||
add_check_array(b'', s8)
|
||||
return tuple(s8)
|
||||
s8 = init_s8()
|
||||
|
||||
# A somewhat fast way to determine whether a given string *could* be base-32 encoded data, given a
|
||||
# lengthinbits.
|
||||
# The boolean value of s5[lengthinbits%5][ord(s[-1])], where s is the possibly base-32 encoded
|
||||
# string tells whether the final character is reasonable.
|
||||
def init_s5():
|
||||
s5 = []
|
||||
add_check_array(get_trailing_chars_without_lsbs(0), s5)
|
||||
for lenmod5 in [1,2,3,4]:
|
||||
add_check_array(get_trailing_chars_without_lsbs(5-lenmod5), s5)
|
||||
return tuple(s5)
|
||||
s5 = init_s5()
|
||||
|
||||
def could_be_base32_encoded(s, s8=s8, tr=string.translate, identitytranstable=identitytranstable, chars=chars):
|
||||
precondition(isinstance(s, six.binary_type), s)
|
||||
if s == '':
|
||||
def could_be_base32_encoded(s, s8=s8, tr=bytes.translate, identitytranstable=identitytranstable, chars=chars):
|
||||
precondition(isinstance(s, bytes), s)
|
||||
if s == b'':
|
||||
return True
|
||||
return s8[len(s)%8][ord(s[-1])] and not tr(s, identitytranstable, chars)
|
||||
|
||||
def could_be_base32_encoded_l(s, lengthinbits, s5=s5, tr=string.translate, identitytranstable=identitytranstable, chars=chars):
|
||||
precondition(isinstance(s, six.binary_type), s)
|
||||
if s == '':
|
||||
return True
|
||||
assert lengthinbits%5 < len(s5), lengthinbits
|
||||
assert ord(s[-1]) < s5[lengthinbits%5]
|
||||
return (((lengthinbits+4)/5) == len(s)) and s5[lengthinbits%5][ord(s[-1])] and not string.translate(s, identitytranstable, chars)
|
||||
|
||||
def num_octets_that_encode_to_this_many_quintets(numqs):
|
||||
# Here is a computation that conveniently expresses this:
|
||||
return (numqs*5+3)/8
|
||||
s = bytes(s) # On Python 2, make sure we're using modern bytes
|
||||
return s8[len(s)%8][s[-1]] and not tr(s, identitytranstable, chars)
|
||||
|
||||
def a2b(cs):
|
||||
"""
|
||||
@param cs the base-32 encoded data (a string)
|
||||
@param cs the base-32 encoded data (as bytes)
|
||||
"""
|
||||
precondition(could_be_base32_encoded(cs), "cs is required to be possibly base32 encoded data.", cs=cs)
|
||||
precondition(isinstance(cs, six.binary_type), cs)
|
||||
precondition(isinstance(cs, bytes), cs)
|
||||
|
||||
return _a2b_l(cs, num_octets_that_encode_to_this_many_quintets(len(cs))*8)
|
||||
|
||||
def _a2b_l(cs, lengthinbits):
|
||||
"""
|
||||
@param lengthinbits the number of bits of data in encoded into cs
|
||||
|
||||
a2b_l() will return a result big enough to hold lengthinbits bits. So for example if cs is
|
||||
4 characters long (encoding at least 15 and up to 20 bits) and lengthinbits is 16, then a2b_l()
|
||||
will return a string of length 2 (since 2 bytes is sufficient to store 16 bits). If cs is 4
|
||||
characters long and lengthinbits is 20, then a2b_l() will return a string of length 3 (since
|
||||
3 bytes is sufficient to store 20 bits). Note that b2a_l() does not mask off unused least-
|
||||
significant bits, so for example if cs is 4 characters long and lengthinbits is 17, then you
|
||||
must ensure that all three of the unused least-significant bits of cs are zero bits or you will
|
||||
get the wrong result. This precondition is tested by assertions if assertions are enabled.
|
||||
(Generally you just require the encoder to ensure this consistency property between the least
|
||||
significant zero bits and value of lengthinbits, and reject strings that have a length-in-bits
|
||||
which isn't a multiple of 8 and yet don't have trailing zero bits, as improperly encoded.)
|
||||
|
||||
Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l().
|
||||
|
||||
@return the data encoded in cs
|
||||
"""
|
||||
precondition(could_be_base32_encoded_l(cs, lengthinbits), "cs is required to be possibly base32 encoded data.", cs=cs, lengthinbits=lengthinbits)
|
||||
precondition(isinstance(cs, six.binary_type), cs)
|
||||
if cs == '':
|
||||
return ''
|
||||
|
||||
qs = map(ord, string.translate(cs, c2vtranstable))
|
||||
|
||||
numoctets = (lengthinbits+7)/8
|
||||
numquintetsofdata = (lengthinbits+4)/5
|
||||
# strip trailing quintets that won't be used
|
||||
del qs[numquintetsofdata:]
|
||||
# zero out any unused bits in the final quintet
|
||||
if lengthinbits % 5 != 0:
|
||||
qs[-1] = qs[-1] >> (5-(lengthinbits % 5))
|
||||
qs[-1] = qs[-1] << (5-(lengthinbits % 5))
|
||||
# append zero quintets for padding if needed
|
||||
numquintetsneeded = (numoctets*8+4)/5
|
||||
qs.extend([0]*(numquintetsneeded-len(qs)))
|
||||
|
||||
octets = []
|
||||
pos = 2048
|
||||
num = qs[0] * pos
|
||||
i = 1
|
||||
while len(octets) < numoctets:
|
||||
while pos > 256:
|
||||
pos = pos / 32
|
||||
num = num + (qs[i] * pos)
|
||||
i = i + 1
|
||||
octet = num / 256
|
||||
octets.append(octet)
|
||||
num = num - (octet * 256)
|
||||
num = num * 256
|
||||
pos = pos * 256
|
||||
assert len(octets) == numoctets, "len(octets): %s, numoctets: %s, octets: %s" % (len(octets), numoctets, octets,)
|
||||
res = ''.join(map(chr, octets))
|
||||
precondition(_b2a_l(res, lengthinbits) == cs, "cs is required to be the canonical base-32 encoding of some data.", b2a(res), res=res, cs=cs)
|
||||
return res
|
||||
cs = cs.upper()
|
||||
# Add padding back, to make Python's base64 module happy:
|
||||
while (len(cs) * 5) % 8 != 0:
|
||||
cs += b"="
|
||||
return base64.b32decode(cs)
|
||||
|
||||
|
||||
__all__ = ["b2a", "a2b", "b2a_or_none", "BASE32CHAR_3bits", "BASE32CHAR_1bits", "BASE32CHAR", "BASE32STR_anybytes", "could_be_base32_encoded"]
|
||||
|
@ -1,22 +1,43 @@
|
||||
# from the Python Standard Library
|
||||
import string
|
||||
"""
|
||||
Base62 encoding.
|
||||
|
||||
Ported to Python 3.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from future.utils import PY2
|
||||
if PY2:
|
||||
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min # noqa: F401
|
||||
|
||||
if PY2:
|
||||
import string
|
||||
maketrans = string.maketrans
|
||||
translate = string.translate
|
||||
else:
|
||||
maketrans = bytes.maketrans
|
||||
translate = bytes.translate
|
||||
|
||||
from past.builtins import chr as byteschr
|
||||
|
||||
from allmydata.util.mathutil import log_ceil, log_floor
|
||||
|
||||
chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
||||
|
||||
BASE62CHAR = '[' + chars + ']'
|
||||
BASE62CHAR = b'[' + chars + b']'
|
||||
|
||||
vals = ''.join([chr(i) for i in range(62)])
|
||||
c2vtranstable = string.maketrans(chars, vals)
|
||||
v2ctranstable = string.maketrans(vals, chars)
|
||||
identitytranstable = string.maketrans(chars, chars)
|
||||
vals = b''.join([byteschr(i) for i in range(62)])
|
||||
c2vtranstable = maketrans(chars, vals)
|
||||
v2ctranstable = maketrans(vals, chars)
|
||||
identitytranstable = maketrans(chars, chars)
|
||||
|
||||
def b2a(os):
|
||||
"""
|
||||
@param os the data to be encoded (a string)
|
||||
@param os the data to be encoded (as bytes)
|
||||
|
||||
@return the contents of os in base-62 encoded form
|
||||
@return the contents of os in base-62 encoded form, as bytes
|
||||
"""
|
||||
cs = b2a_l(os, len(os)*8)
|
||||
assert num_octets_that_encode_to_this_many_chars(len(cs)) == len(os), "%s != %s, numchars: %s" % (num_octets_that_encode_to_this_many_chars(len(cs)), len(os), len(cs))
|
||||
@ -24,7 +45,7 @@ def b2a(os):
|
||||
|
||||
def b2a_l(os, lengthinbits):
|
||||
"""
|
||||
@param os the data to be encoded (a string)
|
||||
@param os the data to be encoded (as bytes)
|
||||
@param lengthinbits the number of bits of data in os to be encoded
|
||||
|
||||
b2a_l() will generate a base-62 encoded string big enough to encode
|
||||
@ -45,9 +66,11 @@ def b2a_l(os, lengthinbits):
|
||||
bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-62 encoded
|
||||
string that is one or two characters longer than necessary.
|
||||
|
||||
@return the contents of os in base-62 encoded form
|
||||
@return the contents of os in base-62 encoded form, as bytes
|
||||
"""
|
||||
os = [ord(o) for o in reversed(os)] # treat os as big-endian -- and we want to process the least-significant o first
|
||||
# We call bytes() again for Python 2, to ensure literals are using future's
|
||||
# Python 3-compatible variant.
|
||||
os = [o for o in reversed(bytes(os))] # treat os as big-endian -- and we want to process the least-significant o first
|
||||
|
||||
value = 0
|
||||
numvalues = 1 # the number of possible values that value could be
|
||||
@ -62,7 +85,7 @@ def b2a_l(os, lengthinbits):
|
||||
value //= 62
|
||||
numvalues //= 62
|
||||
|
||||
return string.translate(''.join([chr(c) for c in reversed(chars)]), v2ctranstable) # make it big-endian
|
||||
return translate(bytes([c for c in reversed(chars)]), v2ctranstable) # make it big-endian
|
||||
|
||||
def num_octets_that_encode_to_this_many_chars(numcs):
|
||||
return log_floor(62**numcs, 256)
|
||||
@ -89,9 +112,11 @@ def a2b_l(cs, lengthinbits):
|
||||
Please see the warning in the docstring of b2a_l() regarding the use of
|
||||
b2a() versus b2a_l().
|
||||
|
||||
@return the data encoded in cs
|
||||
@return the data encoded in cs, as bytes
|
||||
"""
|
||||
cs = [ord(c) for c in reversed(string.translate(cs, c2vtranstable))] # treat cs as big-endian -- and we want to process the least-significant c first
|
||||
# We call bytes() again for Python 2, to ensure literals are using future's
|
||||
# Python 3-compatible variant.
|
||||
cs = [c for c in reversed(bytes(translate(cs, c2vtranstable)))] # treat cs as big-endian -- and we want to process the least-significant c first
|
||||
|
||||
value = 0
|
||||
numvalues = 1 # the number of possible values that value could be
|
||||
@ -101,10 +126,10 @@ def a2b_l(cs, lengthinbits):
|
||||
numvalues *= 62
|
||||
|
||||
numvalues = 2**lengthinbits
|
||||
bytes = []
|
||||
result_bytes = []
|
||||
while numvalues > 1:
|
||||
bytes.append(value % 256)
|
||||
result_bytes.append(value % 256)
|
||||
value //= 256
|
||||
numvalues //= 256
|
||||
|
||||
return ''.join([chr(b) for b in reversed(bytes)]) # make it big-endian
|
||||
return bytes([b for b in reversed(result_bytes)]) # make it big-endian
|
||||
|
Loading…
x
Reference in New Issue
Block a user