Merge remote-tracking branch 'origin/master' into 3358.fileutil-to-python-3

with fijal
2025-04-05 09:59:24 +00:00 · 2020-07-30 13:38:47 -04:00 · 2020-07-30 13:38:47 -04:00 · 27b6737eaf
commit 27b6737eaf
parent ecb3ee023d 9e4eda6912
23 changed files with 1147 additions and 943 deletions
--- a/misc/python3/ratchet-passing
+++ b/misc/python3/ratchet-passing
@ -28,6 +28,8 @@ allmydata.test.test_deferredutil.DeferredUtilTests.test_failure
 allmydata.test.test_deferredutil.DeferredUtilTests.test_gather_results
 allmydata.test.test_deferredutil.DeferredUtilTests.test_success
 allmydata.test.test_deferredutil.DeferredUtilTests.test_wait_for_delayed_calls
+allmydata.test.test_dictutil.DictUtil.test_auxdict
+allmydata.test.test_dictutil.DictUtil.test_dict_of_sets
 allmydata.test.test_hashtree.Complete.test_create
 allmydata.test.test_hashtree.Complete.test_dump
 allmydata.test.test_hashtree.Complete.test_needed_hashes
@ -44,6 +46,14 @@ allmydata.test.test_hashutil.HashUtilTests.test_sha256d
 allmydata.test.test_hashutil.HashUtilTests.test_sha256d_truncated
 allmydata.test.test_hashutil.HashUtilTests.test_timing_safe_compare
 allmydata.test.test_humanreadable.HumanReadable.test_repr
+allmydata.test.test_iputil.ListAddresses.test_get_local_ip_for
+allmydata.test.test_iputil.ListAddresses.test_list_async
+allmydata.test.test_iputil.ListAddresses.test_list_async_mock_cygwin
+allmydata.test.test_iputil.ListAddresses.test_list_async_mock_ifconfig
+allmydata.test.test_iputil.ListAddresses.test_list_async_mock_ip_addr
+allmydata.test.test_iputil.ListAddresses.test_list_async_mock_route
+allmydata.test.test_iputil.ListenOnUsed.test_random_port
+allmydata.test.test_iputil.ListenOnUsed.test_specific_port
 allmydata.test.test_netstring.Netstring.test_encode
 allmydata.test.test_netstring.Netstring.test_extra
 allmydata.test.test_netstring.Netstring.test_nested
@ -58,6 +68,23 @@ allmydata.test.test_pipeline.Pipeline.test_errors2
 allmydata.test.test_python3.Python3PortingEffortTests.test_finished_porting
 allmydata.test.test_python3.Python3PortingEffortTests.test_ported_modules_distinct
 allmydata.test.test_python3.Python3PortingEffortTests.test_ported_modules_exist
+allmydata.test.test_spans.ByteSpans.test_basic
+allmydata.test.test_spans.ByteSpans.test_large
+allmydata.test.test_spans.ByteSpans.test_math
+allmydata.test.test_spans.ByteSpans.test_overlap
+allmydata.test.test_spans.ByteSpans.test_random
+allmydata.test.test_spans.StringSpans.test_basic
+allmydata.test.test_spans.StringSpans.test_random
+allmydata.test.test_spans.StringSpans.test_test
+allmydata.test.test_statistics.Statistics.test_binomial_coeff
+allmydata.test.test_statistics.Statistics.test_binomial_distribution_pmf
+allmydata.test.test_statistics.Statistics.test_convolve
+allmydata.test.test_statistics.Statistics.test_find_k
+allmydata.test.test_statistics.Statistics.test_pr_backup_file_loss
+allmydata.test.test_statistics.Statistics.test_pr_file_loss
+allmydata.test.test_statistics.Statistics.test_repair_cost
+allmydata.test.test_statistics.Statistics.test_repair_count_pmf
+allmydata.test.test_statistics.Statistics.test_survival_pmf
 allmydata.test.test_time_format.TimeFormat.test_epoch
 allmydata.test.test_time_format.TimeFormat.test_epoch_in_London
 allmydata.test.test_time_format.TimeFormat.test_format_delta
@ -66,3 +93,13 @@ allmydata.test.test_time_format.TimeFormat.test_format_time_y2038
 allmydata.test.test_time_format.TimeFormat.test_iso_utc
 allmydata.test.test_time_format.TimeFormat.test_parse_date
 allmydata.test.test_time_format.TimeFormat.test_parse_duration
+allmydata.test.test_version.CheckRequirement.test_cross_check
+allmydata.test.test_version.CheckRequirement.test_cross_check_unparseable_versions
+allmydata.test.test_version.CheckRequirement.test_extract_openssl_version
+allmydata.test.test_version.CheckRequirement.test_packages_from_pkg_resources
+allmydata.test.test_version.T.test_report_import_error
+allmydata.test.test_version.VersionTestCase.test_basic_versions
+allmydata.test.test_version.VersionTestCase.test_comparison
+allmydata.test.test_version.VersionTestCase.test_from_parts
+allmydata.test.test_version.VersionTestCase.test_irrational_versions
+allmydata.test.test_version.VersionTestCase.test_suggest_normalized_version
--- a/newsfragments/3351.minor
+++ b/newsfragments/3351.minor
--- a/newsfragments/3356.minor
+++ b/newsfragments/3356.minor
--- a/newsfragments/3357.minor
+++ b/newsfragments/3357.minor
@ -0,0 +1 @@
+
--- a/newsfragments/3359.minor
+++ b/newsfragments/3359.minor
--- a/nix/tahoe-lafs.nix
+++ b/nix/tahoe-lafs.nix
@ -4,7 +4,7 @@
 , setuptools, setuptoolsTrial, pyasn1, zope_interface
 , service-identity, pyyaml, magic-wormhole, treq, appdirs
 , beautifulsoup4, eliot, autobahn, cryptography
-, html5lib, pyutil
+, html5lib, pyutil, distro
 }:
 python.pkgs.buildPythonPackage rec {
  version = "1.14.0.dev";
@ -50,7 +50,7 @@ python.pkgs.buildPythonPackage rec {
    setuptoolsTrial pyasn1 zope_interface
    service-identity pyyaml magic-wormhole treq
    eliot autobahn cryptography setuptools
-    future pyutil
+    future pyutil distro
  ];

  checkInputs = with python.pkgs; [
@ -59,6 +59,7 @@ python.pkgs.buildPythonPackage rec {
    fixtures
    beautifulsoup4
    html5lib
+    tenacity
  ];

  checkPhase = ''
--- a/setup.py
+++ b/setup.py
@ -127,6 +127,9 @@ install_requires = [

    # Utility code:
    "pyutil >= 3.3.0",
+
+    # Linux distribution detection:
+    "distro >= 1.4.0",
 ]

 setup_requires = [
@ -387,6 +390,7 @@ setup(name="tahoe-lafs", # also set in __init__.py
              "beautifulsoup4",
              "html5lib",
              "junitxml",
+              "tenacity",
          ] + tor_requires + i2p_requires,
          "tor": tor_requires,
          "i2p": i2p_requires,
--- a/src/allmydata/test/common_py3.py
+++ b/src/allmydata/test/common_py3.py
@ -15,6 +15,9 @@ if PY2:

 import os
 import time
+import signal
+
+from twisted.internet import reactor


 class TimezoneMixin(object):
@ -40,3 +43,25 @@ class TimezoneMixin(object):

    def have_working_tzset(self):
        return hasattr(time, 'tzset')
+
+
+class SignalMixin(object):
+    # This class is necessary for any code which wants to use Processes
+    # outside the usual reactor.run() environment. It is copied from
+    # Twisted's twisted.test.test_process . Note that Twisted-8.2.0 uses
+    # something rather different.
+    sigchldHandler = None
+
+    def setUp(self):
+        # make sure SIGCHLD handler is installed, as it should be on
+        # reactor.run(). problem is reactor may not have been run when this
+        # test runs.
+        if hasattr(reactor, "_handleSigchld") and hasattr(signal, "SIGCHLD"):
+            self.sigchldHandler = signal.signal(signal.SIGCHLD,
+                                                reactor._handleSigchld)
+        return super(SignalMixin, self).setUp()
+
+    def tearDown(self):
+        if self.sigchldHandler:
+            signal.signal(signal.SIGCHLD, self.sigchldHandler)
+        return super(SignalMixin, self).tearDown()
--- a/src/allmydata/test/common_util.py
+++ b/src/allmydata/test/common_util.py
@ -1,7 +1,6 @@
 from __future__ import print_function

 import os, signal
-from future.utils import PY2
 from random import randrange
 from six.moves import StringIO

@ -13,10 +12,11 @@ from ..util.assertutil import precondition
 from allmydata.util.encodingutil import (unicode_platform, get_filesystem_encoding,

                                         get_io_encoding)
-
+from future.utils import PY2
 if PY2: # XXX this is a hack that makes some tests pass on Python3, remove
        # in the future
    from ..scripts import runner
+from .common_py3 import SignalMixin

 def skip_if_cannot_represent_filename(u):
    precondition(isinstance(u, unicode))
@ -93,27 +93,6 @@ class ReallyEqualMixin(object):
        self.assertEqual(type(a), type(b), "a :: %r, b :: %r, %r" % (a, b, msg))


-class SignalMixin(object):
-    # This class is necessary for any code which wants to use Processes
-    # outside the usual reactor.run() environment. It is copied from
-    # Twisted's twisted.test.test_process . Note that Twisted-8.2.0 uses
-    # something rather different.
-    sigchldHandler = None
-
-    def setUp(self):
-        # make sure SIGCHLD handler is installed, as it should be on
-        # reactor.run(). problem is reactor may not have been run when this
-        # test runs.
-        if hasattr(reactor, "_handleSigchld") and hasattr(signal, "SIGCHLD"):
-            self.sigchldHandler = signal.signal(signal.SIGCHLD,
-                                                reactor._handleSigchld)
-        return super(SignalMixin, self).setUp()
-
-    def tearDown(self):
-        if self.sigchldHandler:
-            signal.signal(signal.SIGCHLD, self.sigchldHandler)
-        return super(SignalMixin, self).tearDown()
-
 class StallMixin(object):
    def stall(self, res=None, delay=1):
        d = defer.Deferred()
--- a/src/allmydata/test/test_dictutil.py
+++ b/src/allmydata/test/test_dictutil.py
@ -0,0 +1,90 @@
+"""
+Tests for allmydata.util.dictutil.
+
+Ported to Python 3.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401
+
+from twisted.trial import unittest
+
+from allmydata.util import dictutil
+
+
+class DictUtil(unittest.TestCase):
+    def test_dict_of_sets(self):
+        ds = dictutil.DictOfSets()
+        ds.add(1, "a")
+        ds.add(2, "b")
+        ds.add(2, "b")
+        ds.add(2, "c")
+        self.failUnlessEqual(ds[1], set(["a"]))
+        self.failUnlessEqual(ds[2], set(["b", "c"]))
+        ds.discard(3, "d") # should not raise an exception
+        ds.discard(2, "b")
+        self.failUnlessEqual(ds[2], set(["c"]))
+        ds.discard(2, "c")
+        self.failIf(2 in ds)
+
+        ds.add(3, "f")
+        ds2 = dictutil.DictOfSets()
+        ds2.add(3, "f")
+        ds2.add(3, "g")
+        ds2.add(4, "h")
+        ds.update(ds2)
+        self.failUnlessEqual(ds[1], set(["a"]))
+        self.failUnlessEqual(ds[3], set(["f", "g"]))
+        self.failUnlessEqual(ds[4], set(["h"]))
+
+    def test_auxdict(self):
+        d = dictutil.AuxValueDict()
+        # we put the serialized form in the auxdata
+        d.set_with_aux("key", ("filecap", "metadata"), "serialized")
+
+        self.failUnlessEqual(list(d.keys()), ["key"])
+        self.failUnlessEqual(d["key"], ("filecap", "metadata"))
+        self.failUnlessEqual(d.get_aux("key"), "serialized")
+        def _get_missing(key):
+            return d[key]
+        self.failUnlessRaises(KeyError, _get_missing, "nonkey")
+        self.failUnlessEqual(d.get("nonkey"), None)
+        self.failUnlessEqual(d.get("nonkey", "nonvalue"), "nonvalue")
+        self.failUnlessEqual(d.get_aux("nonkey"), None)
+        self.failUnlessEqual(d.get_aux("nonkey", "nonvalue"), "nonvalue")
+
+        d["key"] = ("filecap2", "metadata2")
+        self.failUnlessEqual(d["key"], ("filecap2", "metadata2"))
+        self.failUnlessEqual(d.get_aux("key"), None)
+
+        d.set_with_aux("key2", "value2", "aux2")
+        self.failUnlessEqual(sorted(d.keys()), ["key", "key2"])
+        del d["key2"]
+        self.failUnlessEqual(list(d.keys()), ["key"])
+        self.failIf("key2" in d)
+        self.failUnlessRaises(KeyError, _get_missing, "key2")
+        self.failUnlessEqual(d.get("key2"), None)
+        self.failUnlessEqual(d.get_aux("key2"), None)
+        d["key2"] = "newvalue2"
+        self.failUnlessEqual(d.get("key2"), "newvalue2")
+        self.failUnlessEqual(d.get_aux("key2"), None)
+
+        d = dictutil.AuxValueDict({1:2,3:4})
+        self.failUnlessEqual(sorted(d.keys()), [1,3])
+        self.failUnlessEqual(d[1], 2)
+        self.failUnlessEqual(d.get_aux(1), None)
+
+        d = dictutil.AuxValueDict([ (1,2), (3,4) ])
+        self.failUnlessEqual(sorted(d.keys()), [1,3])
+        self.failUnlessEqual(d[1], 2)
+        self.failUnlessEqual(d.get_aux(1), None)
+
+        d = dictutil.AuxValueDict(one=1, two=2)
+        self.failUnlessEqual(sorted(d.keys()), ["one","two"])
+        self.failUnlessEqual(d["one"], 1)
+        self.failUnlessEqual(d.get_aux("one"), None)
--- a/src/allmydata/test/test_iputil.py
+++ b/src/allmydata/test/test_iputil.py
@ -1,16 +1,36 @@
+"""
+Tests for allmydata.util.iputil.

-import re, errno, subprocess, os
+Ported to Python 3.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2, native_str
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401
+
+import re, errno, subprocess, os, socket

 from twisted.trial import unittest

+from tenacity import retry, stop_after_attempt
+
+from foolscap.api import Tub
+
 from allmydata.util import iputil
-import allmydata.test.common_util as testutil
+import allmydata.test.common_py3 as testutil
 from allmydata.util.namespace import Namespace


-DOTTED_QUAD_RE=re.compile("^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$")
+DOTTED_QUAD_RE=re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$")

-MOCK_IPADDR_OUTPUT = """\
+# Mock output from subprocesses should be bytes, that's what happens on both
+# Python 2 and Python 3:
+MOCK_IPADDR_OUTPUT = b"""\
 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue state UNKNOWN \n\
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
@ -28,7 +48,7 @@ MOCK_IPADDR_OUTPUT = """\
       valid_lft forever preferred_lft forever
 """

-MOCK_IFCONFIG_OUTPUT = """\
+MOCK_IFCONFIG_OUTPUT = b"""\
 eth1      Link encap:Ethernet  HWaddr d4:3d:7e:01:b4:3e  \n\
          inet addr:192.168.0.6  Bcast:192.168.0.255  Mask:255.255.255.0
          inet6 addr: fe80::d63d:7eff:fe01:b43e/64 Scope:Link
@ -59,7 +79,7 @@ wlan0     Link encap:Ethernet  HWaddr 90:f6:52:27:15:0a  \n\
 """

 # This is actually from a VirtualBox VM running XP.
-MOCK_ROUTE_OUTPUT = """\
+MOCK_ROUTE_OUTPUT = b"""\
 ===========================================================================
 Interface List
 0x1 ........................... MS TCP Loopback interface
@ -98,6 +118,11 @@ class ListAddresses(testutil.SignalMixin, unittest.TestCase):
    def test_get_local_ip_for(self):
        addr = iputil.get_local_ip_for('127.0.0.1')
        self.failUnless(DOTTED_QUAD_RE.match(addr))
+        # Bytes can be taken as input:
+        bytes_addr = iputil.get_local_ip_for(b'127.0.0.1')
+        self.assertEqual(addr, bytes_addr)
+        # The output is a native string:
+        self.assertIsInstance(addr, native_str)

    def test_list_async(self):
        d = iputil.get_local_addresses_async()
@ -162,3 +187,44 @@ class ListAddresses(testutil.SignalMixin, unittest.TestCase):
    def test_list_async_mock_cygwin(self):
        self.patch(iputil, 'platform', "cygwin")
        return self._test_list_async_mock(None, None, CYGWIN_TEST_ADDRESSES)
+
+
+class ListenOnUsed(unittest.TestCase):
+    """Tests for listenOnUnused."""
+
+    def create_tub(self, basedir):
+        os.makedirs(basedir)
+        tubfile = os.path.join(basedir, "tub.pem")
+        tub = Tub(certFile=tubfile)
+        tub.setOption("expose-remote-exception-types", False)
+        tub.startService()
+        self.addCleanup(tub.stopService)
+        return tub
+
+    @retry(stop=stop_after_attempt(7))
+    def test_random_port(self):
+        """A random port is selected if none is given."""
+        tub = self.create_tub("utils/ListenOnUsed/test_randomport")
+        self.assertEqual(len(tub.getListeners()), 0)
+        portnum = iputil.listenOnUnused(tub)
+        # We can connect to this port:
+        s = socket.socket()
+        s.connect(("127.0.0.1", portnum))
+        s.close()
+        self.assertEqual(len(tub.getListeners()), 1)
+
+        # Listen on another port:
+        tub2 = self.create_tub("utils/ListenOnUsed/test_randomport_2")
+        portnum2 = iputil.listenOnUnused(tub2)
+        self.assertNotEqual(portnum, portnum2)
+
+    @retry(stop=stop_after_attempt(7))
+    def test_specific_port(self):
+        """The given port is used."""
+        tub = self.create_tub("utils/ListenOnUsed/test_givenport")
+        s = socket.socket()
+        s.bind(("127.0.0.1", 0))
+        port = s.getsockname()[1]
+        s.close()
+        port2 = iputil.listenOnUnused(tub, port)
+        self.assertEqual(port, port2)
--- a/src/allmydata/test/test_spans.py
+++ b/src/allmydata/test/test_spans.py
@ -0,0 +1,617 @@
+"""
+Tests for allmydata.util.spans.
+"""
+
+from __future__ import print_function
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401
+
+from past.builtins import long
+
+import binascii
+import hashlib
+
+from twisted.trial import unittest
+
+from allmydata.util.spans import Spans, overlap, DataSpans
+
+
+def sha256(data):
+    """
+    :param bytes data: data to hash
+
+    :returns: a hex-encoded SHA256 hash of the data
+    """
+    return binascii.hexlify(hashlib.sha256(data).digest())
+
+
+class SimpleSpans(object):
+    # this is a simple+inefficient form of util.spans.Spans . We compare the
+    # behavior of this reference model against the real (efficient) form.
+
+    def __init__(self, _span_or_start=None, length=None):
+        self._have = set()
+        if length is not None:
+            for i in range(_span_or_start, _span_or_start+length):
+                self._have.add(i)
+        elif _span_or_start:
+            for (start,length) in _span_or_start:
+                self.add(start, length)
+
+    def add(self, start, length):
+        for i in range(start, start+length):
+            self._have.add(i)
+        return self
+
+    def remove(self, start, length):
+        for i in range(start, start+length):
+            self._have.discard(i)
+        return self
+
+    def each(self):
+        return sorted(self._have)
+
+    def __iter__(self):
+        items = sorted(self._have)
+        prevstart = None
+        prevend = None
+        for i in items:
+            if prevstart is None:
+                prevstart = prevend = i
+                continue
+            if i == prevend+1:
+                prevend = i
+                continue
+            yield (prevstart, prevend-prevstart+1)
+            prevstart = prevend = i
+        if prevstart is not None:
+            yield (prevstart, prevend-prevstart+1)
+
+    def __bool__(self): # this gets us bool()
+        return bool(self.len())
+
+    def len(self):
+        return len(self._have)
+
+    def __add__(self, other):
+        s = self.__class__(self)
+        for (start, length) in other:
+            s.add(start, length)
+        return s
+
+    def __sub__(self, other):
+        s = self.__class__(self)
+        for (start, length) in other:
+            s.remove(start, length)
+        return s
+
+    def __iadd__(self, other):
+        for (start, length) in other:
+            self.add(start, length)
+        return self
+
+    def __isub__(self, other):
+        for (start, length) in other:
+            self.remove(start, length)
+        return self
+
+    def __and__(self, other):
+        s = self.__class__()
+        for i in other.each():
+            if i in self._have:
+                s.add(i, 1)
+        return s
+
+    def __contains__(self, start_and_length):
+        (start, length) = start_and_length
+        for i in range(start, start+length):
+            if i not in self._have:
+                return False
+        return True
+
+class ByteSpans(unittest.TestCase):
+    def test_basic(self):
+        s = Spans()
+        self.failUnlessEqual(list(s), [])
+        self.failIf(s)
+        self.failIf((0,1) in s)
+        self.failUnlessEqual(s.len(), 0)
+
+        s1 = Spans(3, 4) # 3,4,5,6
+        self._check1(s1)
+
+        s1 = Spans(long(3), long(4)) # 3,4,5,6
+        self._check1(s1)
+
+        s2 = Spans(s1)
+        self._check1(s2)
+
+        s2.add(10,2) # 10,11
+        self._check1(s1)
+        self.failUnless((10,1) in s2)
+        self.failIf((10,1) in s1)
+        self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11])
+        self.failUnlessEqual(s2.len(), 6)
+
+        s2.add(15,2).add(20,2)
+        self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11,15,16,20,21])
+        self.failUnlessEqual(s2.len(), 10)
+
+        s2.remove(4,3).remove(15,1)
+        self.failUnlessEqual(list(s2.each()), [3,10,11,16,20,21])
+        self.failUnlessEqual(s2.len(), 6)
+
+        s1 = SimpleSpans(3, 4) # 3 4 5 6
+        s2 = SimpleSpans(5, 4) # 5 6 7 8
+        i = s1 & s2
+        self.failUnlessEqual(list(i.each()), [5, 6])
+
+    def _check1(self, s):
+        self.failUnlessEqual(list(s), [(3,4)])
+        self.failUnless(s)
+        self.failUnlessEqual(s.len(), 4)
+        self.failIf((0,1) in s)
+        self.failUnless((3,4) in s)
+        self.failUnless((3,1) in s)
+        self.failUnless((5,2) in s)
+        self.failUnless((6,1) in s)
+        self.failIf((6,2) in s)
+        self.failIf((7,1) in s)
+        self.failUnlessEqual(list(s.each()), [3,4,5,6])
+
+    def test_large(self):
+        s = Spans(4, 2**65) # don't do this with a SimpleSpans
+        self.failUnlessEqual(list(s), [(4, 2**65)])
+        self.failUnless(s)
+        self.failUnlessEqual(s.len(), 2**65)
+        self.failIf((0,1) in s)
+        self.failUnless((4,2) in s)
+        self.failUnless((2**65,2) in s)
+
+    def test_math(self):
+        s1 = Spans(0, 10) # 0,1,2,3,4,5,6,7,8,9
+        s2 = Spans(5, 3) # 5,6,7
+        s3 = Spans(8, 4) # 8,9,10,11
+
+        s = s1 - s2
+        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
+        s = s1 - s3
+        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
+        s = s2 - s3
+        self.failUnlessEqual(list(s.each()), [5,6,7])
+        s = s1 & s2
+        self.failUnlessEqual(list(s.each()), [5,6,7])
+        s = s2 & s1
+        self.failUnlessEqual(list(s.each()), [5,6,7])
+        s = s1 & s3
+        self.failUnlessEqual(list(s.each()), [8,9])
+        s = s3 & s1
+        self.failUnlessEqual(list(s.each()), [8,9])
+        s = s2 & s3
+        self.failUnlessEqual(list(s.each()), [])
+        s = s3 & s2
+        self.failUnlessEqual(list(s.each()), [])
+        s = Spans() & s3
+        self.failUnlessEqual(list(s.each()), [])
+        s = s3 & Spans()
+        self.failUnlessEqual(list(s.each()), [])
+
+        s = s1 + s2
+        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
+        s = s1 + s3
+        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
+        s = s2 + s3
+        self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
+
+        s = Spans(s1)
+        s -= s2
+        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
+        s = Spans(s1)
+        s -= s3
+        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
+        s = Spans(s2)
+        s -= s3
+        self.failUnlessEqual(list(s.each()), [5,6,7])
+
+        s = Spans(s1)
+        s += s2
+        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
+        s = Spans(s1)
+        s += s3
+        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
+        s = Spans(s2)
+        s += s3
+        self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
+
+    def test_random(self):
+        # attempt to increase coverage of corner cases by comparing behavior
+        # of a simple-but-slow model implementation against the
+        # complex-but-fast actual implementation, in a large number of random
+        # operations
+        S1 = SimpleSpans
+        S2 = Spans
+        s1 = S1(); s2 = S2()
+        seed = b""
+        def _create(subseed):
+            ns1 = S1(); ns2 = S2()
+            for i in range(10):
+                what = sha256(subseed+bytes(i))
+                start = int(what[2:4], 16)
+                length = max(1,int(what[5:6], 16))
+                ns1.add(start, length); ns2.add(start, length)
+            return ns1, ns2
+
+        #print
+        for i in range(1000):
+            what = sha256(seed+bytes(i))
+            op = what[0:1]
+            subop = what[1:2]
+            start = int(what[2:4], 16)
+            length = max(1,int(what[5:6], 16))
+            #print what
+            if op in b"0":
+                if subop in b"01234":
+                    s1 = S1(); s2 = S2()
+                elif subop in b"5678":
+                    s1 = S1(start, length); s2 = S2(start, length)
+                else:
+                    s1 = S1(s1); s2 = S2(s2)
+                #print "s2 = %s" % s2.dump()
+            elif op in b"123":
+                #print "s2.add(%d,%d)" % (start, length)
+                s1.add(start, length); s2.add(start, length)
+            elif op in b"456":
+                #print "s2.remove(%d,%d)" % (start, length)
+                s1.remove(start, length); s2.remove(start, length)
+            elif op in b"78":
+                ns1, ns2 = _create(what[7:11])
+                #print "s2 + %s" % ns2.dump()
+                s1 = s1 + ns1; s2 = s2 + ns2
+            elif op in b"9a":
+                ns1, ns2 = _create(what[7:11])
+                #print "%s - %s" % (s2.dump(), ns2.dump())
+                s1 = s1 - ns1; s2 = s2 - ns2
+            elif op in b"bc":
+                ns1, ns2 = _create(what[7:11])
+                #print "s2 += %s" % ns2.dump()
+                s1 += ns1; s2 += ns2
+            elif op in b"de":
+                ns1, ns2 = _create(what[7:11])
+                #print "%s -= %s" % (s2.dump(), ns2.dump())
+                s1 -= ns1; s2 -= ns2
+            else:
+                ns1, ns2 = _create(what[7:11])
+                #print "%s &= %s" % (s2.dump(), ns2.dump())
+                s1 = s1 & ns1; s2 = s2 & ns2
+            #print "s2 now %s" % s2.dump()
+            self.failUnlessEqual(list(s1.each()), list(s2.each()))
+            self.failUnlessEqual(s1.len(), s2.len())
+            self.failUnlessEqual(bool(s1), bool(s2))
+            self.failUnlessEqual(list(s1), list(s2))
+            for j in range(10):
+                what = sha256(what[12:14]+bytes(j))
+                start = int(what[2:4], 16)
+                length = max(1, int(what[5:6], 16))
+                span = (start, length)
+                self.failUnlessEqual(bool(span in s1), bool(span in s2))
+
+
+    # s()
+    # s(start,length)
+    # s(s0)
+    # s.add(start,length) : returns s
+    # s.remove(start,length)
+    # s.each() -> list of byte offsets, mostly for testing
+    # list(s) -> list of (start,length) tuples, one per span
+    # (start,length) in s -> True if (start..start+length-1) are all members
+    #  NOT equivalent to x in list(s)
+    # s.len() -> number of bytes, for testing, bool(), and accounting/limiting
+    # bool(s)  (__nonzeron__)
+    # s = s1+s2, s1-s2, +=s1, -=s1
+
+    def test_overlap(self):
+        for a in range(20):
+            for b in range(10):
+                for c in range(20):
+                    for d in range(10):
+                        self._test_overlap(a,b,c,d)
+
+    def _test_overlap(self, a, b, c, d):
+        s1 = set(range(a,a+b))
+        s2 = set(range(c,c+d))
+        #print "---"
+        #self._show_overlap(s1, "1")
+        #self._show_overlap(s2, "2")
+        o = overlap(a,b,c,d)
+        expected = s1.intersection(s2)
+        if not expected:
+            self.failUnlessEqual(o, None)
+        else:
+            start,length = o
+            so = set(range(start,start+length))
+            #self._show(so, "o")
+            self.failUnlessEqual(so, expected)
+
+    def _show_overlap(self, s, c):
+        import sys
+        out = sys.stdout
+        if s:
+            for i in range(max(s)):
+                if i in s:
+                    out.write(c)
+                else:
+                    out.write(" ")
+        out.write("\n")
+
+def extend(s, start, length, fill):
+    if len(s) >= start+length:
+        return s
+    assert len(fill) == 1
+    return s + fill*(start+length-len(s))
+
+def replace(s, start, data):
+    assert len(s) >= start+len(data)
+    return s[:start] + data + s[start+len(data):]
+
+class SimpleDataSpans(object):
+    def __init__(self, other=None):
+        self.missing = "" # "1" where missing, "0" where found
+        self.data = b""
+        if other:
+            for (start, data) in other.get_chunks():
+                self.add(start, data)
+
+    def __bool__(self): # this gets us bool()
+        return bool(self.len())
+
+    def len(self):
+        return len(self.missing.replace("1", ""))
+
+    def _dump(self):
+        return [i for (i,c) in enumerate(self.missing) if c == "0"]
+
+    def _have(self, start, length):
+        m = self.missing[start:start+length]
+        if not m or len(m)<length or int(m):
+            return False
+        return True
+    def get_chunks(self):
+        for i in self._dump():
+            yield (i, self.data[i:i+1])
+    def get_spans(self):
+        return SimpleSpans([(start,len(data))
+                            for (start,data) in self.get_chunks()])
+    def get(self, start, length):
+        if self._have(start, length):
+            return self.data[start:start+length]
+        return None
+    def pop(self, start, length):
+        data = self.get(start, length)
+        if data:
+            self.remove(start, length)
+        return data
+    def remove(self, start, length):
+        self.missing = replace(extend(self.missing, start, length, "1"),
+                               start, "1"*length)
+    def add(self, start, data):
+        self.missing = replace(extend(self.missing, start, len(data), "1"),
+                               start, "0"*len(data))
+        self.data = replace(extend(self.data, start, len(data), b" "),
+                            start, data)
+
+
+class StringSpans(unittest.TestCase):
+    def do_basic(self, klass):
+        ds = klass()
+        self.failUnlessEqual(ds.len(), 0)
+        self.failUnlessEqual(list(ds._dump()), [])
+        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 0)
+        s1 = ds.get_spans()
+        self.failUnlessEqual(ds.get(0, 4), None)
+        self.failUnlessEqual(ds.pop(0, 4), None)
+        ds.remove(0, 4)
+
+        ds.add(2, b"four")
+        self.failUnlessEqual(ds.len(), 4)
+        self.failUnlessEqual(list(ds._dump()), [2,3,4,5])
+        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
+        s1 = ds.get_spans()
+        self.failUnless((2,2) in s1)
+        self.failUnlessEqual(ds.get(0, 4), None)
+        self.failUnlessEqual(ds.pop(0, 4), None)
+        self.failUnlessEqual(ds.get(4, 4), None)
+
+        ds2 = klass(ds)
+        self.failUnlessEqual(ds2.len(), 4)
+        self.failUnlessEqual(list(ds2._dump()), [2,3,4,5])
+        self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 4)
+        self.failUnlessEqual(ds2.get(0, 4), None)
+        self.failUnlessEqual(ds2.pop(0, 4), None)
+        self.failUnlessEqual(ds2.pop(2, 3), b"fou")
+        self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 1)
+        self.failUnlessEqual(ds2.get(2, 3), None)
+        self.failUnlessEqual(ds2.get(5, 1), b"r")
+        self.failUnlessEqual(ds.get(2, 3), b"fou")
+        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
+
+        ds.add(0, b"23")
+        self.failUnlessEqual(ds.len(), 6)
+        self.failUnlessEqual(list(ds._dump()), [0,1,2,3,4,5])
+        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 6)
+        self.failUnlessEqual(ds.get(0, 4), b"23fo")
+        self.failUnlessEqual(ds.pop(0, 4), b"23fo")
+        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 2)
+        self.failUnlessEqual(ds.get(0, 4), None)
+        self.failUnlessEqual(ds.pop(0, 4), None)
+
+        ds = klass()
+        ds.add(2, b"four")
+        ds.add(3, b"ea")
+        self.failUnlessEqual(ds.get(2, 4), b"fear")
+
+        ds = klass()
+        ds.add(long(2), b"four")
+        ds.add(long(3), b"ea")
+        self.failUnlessEqual(ds.get(long(2), long(4)), b"fear")
+
+
+    def do_scan(self, klass):
+        # do a test with gaps and spans of size 1 and 2
+        #  left=(1,11) * right=(1,11) * gapsize=(1,2)
+        # 111, 112, 121, 122, 211, 212, 221, 222
+        #    211
+        #      121
+        #         112
+        #            212
+        #               222
+        #                   221
+        #                      111
+        #                        122
+        #  11 1  1 11 11  11  1 1  111
+        # 0123456789012345678901234567
+        # abcdefghijklmnopqrstuvwxyz-=
+        pieces = [(1, b"bc"),
+                  (4, b"e"),
+                  (7, b"h"),
+                  (9, b"jk"),
+                  (12, b"mn"),
+                  (16, b"qr"),
+                  (20, b"u"),
+                  (22, b"w"),
+                  (25, b"z-="),
+                  ]
+        p_elements = set([1,2,4,7,9,10,12,13,16,17,20,22,25,26,27])
+        S = b"abcdefghijklmnopqrstuvwxyz-="
+        # TODO: when adding data, add capital letters, to make sure we aren't
+        # just leaving the old data in place
+        l = len(S)
+        def base():
+            ds = klass()
+            for start, data in pieces:
+                ds.add(start, data)
+            return ds
+        def dump(s):
+            p = set(s._dump())
+            d = b"".join([((i not in p) and b" " or S[i]) for i in range(l)])
+            assert len(d) == l
+            return d
+        DEBUG = False
+        for start in range(0, l):
+            for end in range(start+1, l):
+                # add [start-end) to the baseline
+                which = "%d-%d" % (start, end-1)
+                p_added = set(range(start, end))
+                b = base()
+                if DEBUG:
+                    print()
+                    print(dump(b), which)
+                    add = klass(); add.add(start, S[start:end])
+                    print(dump(add))
+                b.add(start, S[start:end])
+                if DEBUG:
+                    print(dump(b))
+                # check that the new span is there
+                d = b.get(start, end-start)
+                self.failUnlessEqual(d, S[start:end], which)
+                # check that all the original pieces are still there
+                for t_start, t_data in pieces:
+                    t_len = len(t_data)
+                    self.failUnlessEqual(b.get(t_start, t_len),
+                                         S[t_start:t_start+t_len],
+                                         "%s %d+%d" % (which, t_start, t_len))
+                # check that a lot of subspans are mostly correct
+                for t_start in range(l):
+                    for t_len in range(1,4):
+                        d = b.get(t_start, t_len)
+                        if d is not None:
+                            which2 = "%s+(%d-%d)" % (which, t_start,
+                                                     t_start+t_len-1)
+                            self.failUnlessEqual(d, S[t_start:t_start+t_len],
+                                                 which2)
+                        # check that removing a subspan gives the right value
+                        b2 = klass(b)
+                        b2.remove(t_start, t_len)
+                        removed = set(range(t_start, t_start+t_len))
+                        for i in range(l):
+                            exp = (((i in p_elements) or (i in p_added))
+                                   and (i not in removed))
+                            which2 = "%s-(%d-%d)" % (which, t_start,
+                                                     t_start+t_len-1)
+                            self.failUnlessEqual(bool(b2.get(i, 1)), exp,
+                                                 which2+" %d" % i)
+
+    def test_test(self):
+        self.do_basic(SimpleDataSpans)
+        self.do_scan(SimpleDataSpans)
+
+    def test_basic(self):
+        self.do_basic(DataSpans)
+        self.do_scan(DataSpans)
+
+    def test_random(self):
+        # attempt to increase coverage of corner cases by comparing behavior
+        # of a simple-but-slow model implementation against the
+        # complex-but-fast actual implementation, in a large number of random
+        # operations
+        S1 = SimpleDataSpans
+        S2 = DataSpans
+        s1 = S1(); s2 = S2()
+        seed = b""
+        def _randstr(length, seed):
+            created = 0
+            pieces = []
+            while created < length:
+                piece = sha256(seed + bytes(created))
+                pieces.append(piece)
+                created += len(piece)
+            return b"".join(pieces)[:length]
+        def _create(subseed):
+            ns1 = S1(); ns2 = S2()
+            for i in range(10):
+                what = sha256(subseed+bytes(i))
+                start = int(what[2:4], 16)
+                length = max(1,int(what[5:6], 16))
+                ns1.add(start, _randstr(length, what[7:9]));
+                ns2.add(start, _randstr(length, what[7:9]))
+            return ns1, ns2
+
+        #print
+        for i in range(1000):
+            what = sha256(seed+bytes(i))
+            op = what[0:1]
+            subop = what[1:2]
+            start = int(what[2:4], 16)
+            length = max(1,int(what[5:6], 16))
+            #print what
+            if op in b"0":
+                if subop in b"0123456":
+                    s1 = S1(); s2 = S2()
+                else:
+                    s1, s2 = _create(what[7:11])
+                #print "s2 = %s" % list(s2._dump())
+            elif op in b"123456":
+                #print "s2.add(%d,%d)" % (start, length)
+                s1.add(start, _randstr(length, what[7:9]));
+                s2.add(start, _randstr(length, what[7:9]))
+            elif op in b"789abc":
+                #print "s2.remove(%d,%d)" % (start, length)
+                s1.remove(start, length); s2.remove(start, length)
+            else:
+                #print "s2.pop(%d,%d)" % (start, length)
+                d1 = s1.pop(start, length); d2 = s2.pop(start, length)
+                self.failUnlessEqual(d1, d2)
+            #print "s1 now %s" % list(s1._dump())
+            #print "s2 now %s" % list(s2._dump())
+            self.failUnlessEqual(s1.len(), s2.len())
+            self.failUnlessEqual(list(s1._dump()), list(s2._dump()))
+            for j in range(100):
+                what = sha256(what[12:14]+bytes(j))
+                start = int(what[2:4], 16)
+                length = max(1, int(what[5:6], 16))
+                d1 = s1.get(start, length); d2 = s2.get(start, length)
+                self.failUnlessEqual(d1, d2, "%d+%d" % (start, length))
--- a/src/allmydata/test/test_statistics.py
+++ b/src/allmydata/test/test_statistics.py
@ -0,0 +1,158 @@
+"""
+Tests for allmydata.util.statistics.
+
+Ported to Python 3.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401
+
+from six.moves import StringIO  # native string StringIO
+
+from twisted.trial import unittest
+
+from allmydata.util import statistics
+
+
+class Statistics(unittest.TestCase):
+    def should_assert(self, msg, func, *args, **kwargs):
+        try:
+            func(*args, **kwargs)
+            self.fail(msg)
+        except AssertionError:
+            pass
+
+    def failUnlessListEqual(self, a, b, msg = None):
+        self.failUnlessEqual(len(a), len(b))
+        for i in range(len(a)):
+            self.failUnlessEqual(a[i], b[i], msg)
+
+    def failUnlessListAlmostEqual(self, a, b, places = 7, msg = None):
+        self.failUnlessEqual(len(a), len(b))
+        for i in range(len(a)):
+            self.failUnlessAlmostEqual(a[i], b[i], places, msg)
+
+    def test_binomial_coeff(self):
+        f = statistics.binomial_coeff
+        self.failUnlessEqual(f(20, 0), 1)
+        self.failUnlessEqual(f(20, 1), 20)
+        self.failUnlessEqual(f(20, 2), 190)
+        self.failUnlessEqual(f(20, 8), f(20, 12))
+        self.should_assert("Should assert if n < k", f, 2, 3)
+        self.assertEqual(f(5, 3), f(5, 2))
+
+    def test_binomial_distribution_pmf(self):
+        f = statistics.binomial_distribution_pmf
+
+        pmf_comp = f(2, .1)
+        pmf_stat = [0.81, 0.18, 0.01]
+        self.failUnlessListAlmostEqual(pmf_comp, pmf_stat)
+
+        # Summing across a PMF should give the total probability 1
+        self.failUnlessAlmostEqual(sum(pmf_comp), 1)
+        self.should_assert("Should assert if not 0<=p<=1", f, 1, -1)
+        self.should_assert("Should assert if n < 1", f, 0, .1)
+
+        out = StringIO()
+        statistics.print_pmf(pmf_comp, out=out)
+        lines = out.getvalue().splitlines()
+        self.failUnlessEqual(lines[0], "i=0: 0.81")
+        self.failUnlessEqual(lines[1], "i=1: 0.18")
+        self.failUnlessEqual(lines[2], "i=2: 0.01")
+
+    def test_survival_pmf(self):
+        f = statistics.survival_pmf
+        # Cross-check binomial-distribution method against convolution
+        # method.
+        p_list = [.9999] * 100 + [.99] * 50 + [.8] * 20
+        pmf1 = statistics.survival_pmf_via_conv(p_list)
+        pmf2 = statistics.survival_pmf_via_bd(p_list)
+        self.failUnlessListAlmostEqual(pmf1, pmf2)
+        self.failUnlessTrue(statistics.valid_pmf(pmf1))
+        self.should_assert("Should assert if p_i > 1", f, [1.1]);
+        self.should_assert("Should assert if p_i < 0", f, [-.1]);
+
+    def test_repair_count_pmf(self):
+        survival_pmf = statistics.binomial_distribution_pmf(5, .9)
+        repair_pmf = statistics.repair_count_pmf(survival_pmf, 3)
+        # repair_pmf[0] == sum(survival_pmf[0,1,2,5])
+        # repair_pmf[1] == survival_pmf[4]
+        # repair_pmf[2] = survival_pmf[3]
+        self.failUnlessListAlmostEqual(repair_pmf,
+                                       [0.00001 + 0.00045 + 0.0081 + 0.59049,
+                                        .32805,
+                                        .0729,
+                                        0, 0, 0])
+
+    def test_repair_cost(self):
+        survival_pmf = statistics.binomial_distribution_pmf(5, .9)
+        bwcost = statistics.bandwidth_cost_function
+        cost = statistics.mean_repair_cost(bwcost, 1000,
+                                           survival_pmf, 3, ul_dl_ratio=1.0)
+        self.failUnlessAlmostEqual(cost, 558.90)
+        cost = statistics.mean_repair_cost(bwcost, 1000,
+                                           survival_pmf, 3, ul_dl_ratio=8.0)
+        self.failUnlessAlmostEqual(cost, 1664.55)
+
+        # I haven't manually checked the math beyond here -warner
+        cost = statistics.eternal_repair_cost(bwcost, 1000,
+                                              survival_pmf, 3,
+                                              discount_rate=0, ul_dl_ratio=1.0)
+        self.failUnlessAlmostEqual(cost, 65292.056074766246)
+        cost = statistics.eternal_repair_cost(bwcost, 1000,
+                                              survival_pmf, 3,
+                                              discount_rate=0.05,
+                                              ul_dl_ratio=1.0)
+        self.failUnlessAlmostEqual(cost, 9133.6097158191551)
+
+    def test_convolve(self):
+        f = statistics.convolve
+        v1 = [ 1, 2, 3 ]
+        v2 = [ 4, 5, 6 ]
+        v3 = [ 7, 8 ]
+        v1v2result = [ 4, 13, 28, 27, 18 ]
+        # Convolution is commutative
+        r1 = f(v1, v2)
+        r2 = f(v2, v1)
+        self.failUnlessListEqual(r1, r2, "Convolution should be commutative")
+        self.failUnlessListEqual(r1, v1v2result, "Didn't match known result")
+        # Convolution is associative
+        r1 = f(f(v1, v2), v3)
+        r2 = f(v1, f(v2, v3))
+        self.failUnlessListEqual(r1, r2, "Convolution should be associative")
+        # Convolution is distributive
+        r1 = f(v3, [ a + b for a, b in zip(v1, v2) ])
+        tmp1 = f(v3, v1)
+        tmp2 = f(v3, v2)
+        r2 = [ a + b for a, b in zip(tmp1, tmp2) ]
+        self.failUnlessListEqual(r1, r2, "Convolution should be distributive")
+        # Convolution is scalar multiplication associative
+        tmp1 = f(v1, v2)
+        r1 = [ a * 4 for a in tmp1 ]
+        tmp2 = [ a * 4 for a in v1 ]
+        r2 = f(tmp2, v2)
+        self.failUnlessListEqual(r1, r2, "Convolution should be scalar multiplication associative")
+
+    def test_find_k(self):
+        f = statistics.find_k
+        g = statistics.pr_file_loss
+        plist = [.9] * 10 + [.8] * 10 # N=20
+        t = .0001
+        k = f(plist, t)
+        self.failUnlessEqual(k, 10)
+        self.failUnless(g(plist, k) < t)
+
+    def test_pr_file_loss(self):
+        f = statistics.pr_file_loss
+        plist = [.5] * 10
+        self.failUnlessEqual(f(plist, 3), .0546875)
+
+    def test_pr_backup_file_loss(self):
+        f = statistics.pr_backup_file_loss
+        plist = [.5] * 10
+        self.failUnlessEqual(f(plist, .5, 3), .02734375)
--- a/src/allmydata/test/test_util.py
+++ b/src/allmydata/test/test_util.py
@ -8,11 +8,9 @@ if PY2:
    from builtins import zip, str, range, object
 import binascii
 import six
-import hashlib
 import os, time, sys
 import yaml

-from six.moves import StringIO
 from twisted.trial import unittest
 from twisted.internet import defer, reactor
 from twisted.python.failure import Failure
@ -20,9 +18,8 @@ from twisted.python.failure import Failure
 from allmydata.util import idlib, mathutil
 from allmydata.util import fileutil
 from allmydata.util import limiter, pollmixin
-from allmydata.util import statistics, dictutil, yamlutil
+from allmydata.util import yamlutil
 from allmydata.util import log as tahoe_log
-from allmydata.util.spans import Spans, overlap, DataSpans
 from allmydata.util.fileutil import EncryptedTemporaryFile
 from allmydata.test.common_util import ReallyEqualMixin

@ -30,15 +27,6 @@ if six.PY3:
    long = int


-def sha256(data):
-    """
-    :param bytes data: data to hash
-
-    :returns: a hex-encoded SHA256 hash of the data
-    """
-    return binascii.hexlify(hashlib.sha256(data.encode('utf8')).digest()).decode("utf8")
-
-
 class IDLib(unittest.TestCase):
    def test_nodeid_b2a(self):
        self.failUnlessEqual(idlib.nodeid_b2a(b"\x00"*20), "a"*32)
@ -52,143 +40,6 @@ class Math(unittest.TestCase):
        f = mathutil.round_sigfigs
        self.failUnlessEqual(f(22.0/3, 4), 7.3330000000000002)

-class Statistics(unittest.TestCase):
-    def should_assert(self, msg, func, *args, **kwargs):
-        try:
-            func(*args, **kwargs)
-            self.fail(msg)
-        except AssertionError:
-            pass
-
-    def failUnlessListEqual(self, a, b, msg = None):
-        self.failUnlessEqual(len(a), len(b))
-        for i in range(len(a)):
-            self.failUnlessEqual(a[i], b[i], msg)
-
-    def failUnlessListAlmostEqual(self, a, b, places = 7, msg = None):
-        self.failUnlessEqual(len(a), len(b))
-        for i in range(len(a)):
-            self.failUnlessAlmostEqual(a[i], b[i], places, msg)
-
-    def test_binomial_coeff(self):
-        f = statistics.binomial_coeff
-        self.failUnlessEqual(f(20, 0), 1)
-        self.failUnlessEqual(f(20, 1), 20)
-        self.failUnlessEqual(f(20, 2), 190)
-        self.failUnlessEqual(f(20, 8), f(20, 12))
-        self.should_assert("Should assert if n < k", f, 2, 3)
-
-    def test_binomial_distribution_pmf(self):
-        f = statistics.binomial_distribution_pmf
-
-        pmf_comp = f(2, .1)
-        pmf_stat = [0.81, 0.18, 0.01]
-        self.failUnlessListAlmostEqual(pmf_comp, pmf_stat)
-
-        # Summing across a PMF should give the total probability 1
-        self.failUnlessAlmostEqual(sum(pmf_comp), 1)
-        self.should_assert("Should assert if not 0<=p<=1", f, 1, -1)
-        self.should_assert("Should assert if n < 1", f, 0, .1)
-
-        out = StringIO()
-        statistics.print_pmf(pmf_comp, out=out)
-        lines = out.getvalue().splitlines()
-        self.failUnlessEqual(lines[0], "i=0: 0.81")
-        self.failUnlessEqual(lines[1], "i=1: 0.18")
-        self.failUnlessEqual(lines[2], "i=2: 0.01")
-
-    def test_survival_pmf(self):
-        f = statistics.survival_pmf
-        # Cross-check binomial-distribution method against convolution
-        # method.
-        p_list = [.9999] * 100 + [.99] * 50 + [.8] * 20
-        pmf1 = statistics.survival_pmf_via_conv(p_list)
-        pmf2 = statistics.survival_pmf_via_bd(p_list)
-        self.failUnlessListAlmostEqual(pmf1, pmf2)
-        self.failUnlessTrue(statistics.valid_pmf(pmf1))
-        self.should_assert("Should assert if p_i > 1", f, [1.1]);
-        self.should_assert("Should assert if p_i < 0", f, [-.1]);
-
-    def test_repair_count_pmf(self):
-        survival_pmf = statistics.binomial_distribution_pmf(5, .9)
-        repair_pmf = statistics.repair_count_pmf(survival_pmf, 3)
-        # repair_pmf[0] == sum(survival_pmf[0,1,2,5])
-        # repair_pmf[1] == survival_pmf[4]
-        # repair_pmf[2] = survival_pmf[3]
-        self.failUnlessListAlmostEqual(repair_pmf,
-                                       [0.00001 + 0.00045 + 0.0081 + 0.59049,
-                                        .32805,
-                                        .0729,
-                                        0, 0, 0])
-
-    def test_repair_cost(self):
-        survival_pmf = statistics.binomial_distribution_pmf(5, .9)
-        bwcost = statistics.bandwidth_cost_function
-        cost = statistics.mean_repair_cost(bwcost, 1000,
-                                           survival_pmf, 3, ul_dl_ratio=1.0)
-        self.failUnlessAlmostEqual(cost, 558.90)
-        cost = statistics.mean_repair_cost(bwcost, 1000,
-                                           survival_pmf, 3, ul_dl_ratio=8.0)
-        self.failUnlessAlmostEqual(cost, 1664.55)
-
-        # I haven't manually checked the math beyond here -warner
-        cost = statistics.eternal_repair_cost(bwcost, 1000,
-                                              survival_pmf, 3,
-                                              discount_rate=0, ul_dl_ratio=1.0)
-        self.failUnlessAlmostEqual(cost, 65292.056074766246)
-        cost = statistics.eternal_repair_cost(bwcost, 1000,
-                                              survival_pmf, 3,
-                                              discount_rate=0.05,
-                                              ul_dl_ratio=1.0)
-        self.failUnlessAlmostEqual(cost, 9133.6097158191551)
-
-    def test_convolve(self):
-        f = statistics.convolve
-        v1 = [ 1, 2, 3 ]
-        v2 = [ 4, 5, 6 ]
-        v3 = [ 7, 8 ]
-        v1v2result = [ 4, 13, 28, 27, 18 ]
-        # Convolution is commutative
-        r1 = f(v1, v2)
-        r2 = f(v2, v1)
-        self.failUnlessListEqual(r1, r2, "Convolution should be commutative")
-        self.failUnlessListEqual(r1, v1v2result, "Didn't match known result")
-        # Convolution is associative
-        r1 = f(f(v1, v2), v3)
-        r2 = f(v1, f(v2, v3))
-        self.failUnlessListEqual(r1, r2, "Convolution should be associative")
-        # Convolution is distributive
-        r1 = f(v3, [ a + b for a, b in zip(v1, v2) ])
-        tmp1 = f(v3, v1)
-        tmp2 = f(v3, v2)
-        r2 = [ a + b for a, b in zip(tmp1, tmp2) ]
-        self.failUnlessListEqual(r1, r2, "Convolution should be distributive")
-        # Convolution is scalar multiplication associative
-        tmp1 = f(v1, v2)
-        r1 = [ a * 4 for a in tmp1 ]
-        tmp2 = [ a * 4 for a in v1 ]
-        r2 = f(tmp2, v2)
-        self.failUnlessListEqual(r1, r2, "Convolution should be scalar multiplication associative")
-
-    def test_find_k(self):
-        f = statistics.find_k
-        g = statistics.pr_file_loss
-        plist = [.9] * 10 + [.8] * 10 # N=20
-        t = .0001
-        k = f(plist, t)
-        self.failUnlessEqual(k, 10)
-        self.failUnless(g(plist, k) < t)
-
-    def test_pr_file_loss(self):
-        f = statistics.pr_file_loss
-        plist = [.5] * 10
-        self.failUnlessEqual(f(plist, 3), .0546875)
-
-    def test_pr_backup_file_loss(self):
-        f = statistics.pr_backup_file_loss
-        plist = [.5] * 10
-        self.failUnlessEqual(f(plist, .5, 3), .02734375)
-

 class FileUtil(ReallyEqualMixin, unittest.TestCase):
    def mkdir(self, basedir, path, mode=0o777):
@ -685,78 +536,6 @@ class EqButNotIs(object):
    def __eq__(self, other):
        return self.x == other

-class DictUtil(unittest.TestCase):
-    def test_dict_of_sets(self):
-        ds = dictutil.DictOfSets()
-        ds.add(1, "a")
-        ds.add(2, "b")
-        ds.add(2, "b")
-        ds.add(2, "c")
-        self.failUnlessEqual(ds[1], set(["a"]))
-        self.failUnlessEqual(ds[2], set(["b", "c"]))
-        ds.discard(3, "d") # should not raise an exception
-        ds.discard(2, "b")
-        self.failUnlessEqual(ds[2], set(["c"]))
-        ds.discard(2, "c")
-        self.failIf(2 in ds)
-
-        ds.add(3, "f")
-        ds2 = dictutil.DictOfSets()
-        ds2.add(3, "f")
-        ds2.add(3, "g")
-        ds2.add(4, "h")
-        ds.update(ds2)
-        self.failUnlessEqual(ds[1], set(["a"]))
-        self.failUnlessEqual(ds[3], set(["f", "g"]))
-        self.failUnlessEqual(ds[4], set(["h"]))
-
-    def test_auxdict(self):
-        d = dictutil.AuxValueDict()
-        # we put the serialized form in the auxdata
-        d.set_with_aux("key", ("filecap", "metadata"), "serialized")
-
-        self.failUnlessEqual(list(d.keys()), ["key"])
-        self.failUnlessEqual(d["key"], ("filecap", "metadata"))
-        self.failUnlessEqual(d.get_aux("key"), "serialized")
-        def _get_missing(key):
-            return d[key]
-        self.failUnlessRaises(KeyError, _get_missing, "nonkey")
-        self.failUnlessEqual(d.get("nonkey"), None)
-        self.failUnlessEqual(d.get("nonkey", "nonvalue"), "nonvalue")
-        self.failUnlessEqual(d.get_aux("nonkey"), None)
-        self.failUnlessEqual(d.get_aux("nonkey", "nonvalue"), "nonvalue")
-
-        d["key"] = ("filecap2", "metadata2")
-        self.failUnlessEqual(d["key"], ("filecap2", "metadata2"))
-        self.failUnlessEqual(d.get_aux("key"), None)
-
-        d.set_with_aux("key2", "value2", "aux2")
-        self.failUnlessEqual(sorted(d.keys()), ["key", "key2"])
-        del d["key2"]
-        self.failUnlessEqual(list(d.keys()), ["key"])
-        self.failIf("key2" in d)
-        self.failUnlessRaises(KeyError, _get_missing, "key2")
-        self.failUnlessEqual(d.get("key2"), None)
-        self.failUnlessEqual(d.get_aux("key2"), None)
-        d["key2"] = "newvalue2"
-        self.failUnlessEqual(d.get("key2"), "newvalue2")
-        self.failUnlessEqual(d.get_aux("key2"), None)
-
-        d = dictutil.AuxValueDict({1:2,3:4})
-        self.failUnlessEqual(sorted(d.keys()), [1,3])
-        self.failUnlessEqual(d[1], 2)
-        self.failUnlessEqual(d.get_aux(1), None)
-
-        d = dictutil.AuxValueDict([ (1,2), (3,4) ])
-        self.failUnlessEqual(sorted(d.keys()), [1,3])
-        self.failUnlessEqual(d[1], 2)
-        self.failUnlessEqual(d.get_aux(1), None)
-
-        d = dictutil.AuxValueDict(one=1, two=2)
-        self.failUnlessEqual(sorted(d.keys()), ["one","two"])
-        self.failUnlessEqual(d["one"], 1)
-        self.failUnlessEqual(d.get_aux("one"), None)
-

 class SampleError(Exception):
    pass
@ -772,589 +551,6 @@ class Log(unittest.TestCase):
        self.flushLoggedErrors(SampleError)


-class SimpleSpans(object):
-    # this is a simple+inefficient form of util.spans.Spans . We compare the
-    # behavior of this reference model against the real (efficient) form.
-
-    def __init__(self, _span_or_start=None, length=None):
-        self._have = set()
-        if length is not None:
-            for i in range(_span_or_start, _span_or_start+length):
-                self._have.add(i)
-        elif _span_or_start:
-            for (start,length) in _span_or_start:
-                self.add(start, length)
-
-    def add(self, start, length):
-        for i in range(start, start+length):
-            self._have.add(i)
-        return self
-
-    def remove(self, start, length):
-        for i in range(start, start+length):
-            self._have.discard(i)
-        return self
-
-    def each(self):
-        return sorted(self._have)
-
-    def __iter__(self):
-        items = sorted(self._have)
-        prevstart = None
-        prevend = None
-        for i in items:
-            if prevstart is None:
-                prevstart = prevend = i
-                continue
-            if i == prevend+1:
-                prevend = i
-                continue
-            yield (prevstart, prevend-prevstart+1)
-            prevstart = prevend = i
-        if prevstart is not None:
-            yield (prevstart, prevend-prevstart+1)
-
-    def __bool__(self): # this gets us bool()
-        return bool(self.len())
-
-    def len(self):
-        return len(self._have)
-
-    def __add__(self, other):
-        s = self.__class__(self)
-        for (start, length) in other:
-            s.add(start, length)
-        return s
-
-    def __sub__(self, other):
-        s = self.__class__(self)
-        for (start, length) in other:
-            s.remove(start, length)
-        return s
-
-    def __iadd__(self, other):
-        for (start, length) in other:
-            self.add(start, length)
-        return self
-
-    def __isub__(self, other):
-        for (start, length) in other:
-            self.remove(start, length)
-        return self
-
-    def __and__(self, other):
-        s = self.__class__()
-        for i in other.each():
-            if i in self._have:
-                s.add(i, 1)
-        return s
-
-    def __contains__(self, start_and_length):
-        (start, length) = start_and_length
-        for i in range(start, start+length):
-            if i not in self._have:
-                return False
-        return True
-
-class ByteSpans(unittest.TestCase):
-    def test_basic(self):
-        s = Spans()
-        self.failUnlessEqual(list(s), [])
-        self.failIf(s)
-        self.failIf((0,1) in s)
-        self.failUnlessEqual(s.len(), 0)
-
-        s1 = Spans(3, 4) # 3,4,5,6
-        self._check1(s1)
-
-        s1 = Spans(int(3), int(4)) # 3,4,5,6
-        self._check1(s1)
-
-        s2 = Spans(s1)
-        self._check1(s2)
-
-        s2.add(10,2) # 10,11
-        self._check1(s1)
-        self.failUnless((10,1) in s2)
-        self.failIf((10,1) in s1)
-        self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11])
-        self.failUnlessEqual(s2.len(), 6)
-
-        s2.add(15,2).add(20,2)
-        self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11,15,16,20,21])
-        self.failUnlessEqual(s2.len(), 10)
-
-        s2.remove(4,3).remove(15,1)
-        self.failUnlessEqual(list(s2.each()), [3,10,11,16,20,21])
-        self.failUnlessEqual(s2.len(), 6)
-
-        s1 = SimpleSpans(3, 4) # 3 4 5 6
-        s2 = SimpleSpans(5, 4) # 5 6 7 8
-        i = s1 & s2
-        self.failUnlessEqual(list(i.each()), [5, 6])
-
-    def _check1(self, s):
-        self.failUnlessEqual(list(s), [(3,4)])
-        self.failUnless(s)
-        self.failUnlessEqual(s.len(), 4)
-        self.failIf((0,1) in s)
-        self.failUnless((3,4) in s)
-        self.failUnless((3,1) in s)
-        self.failUnless((5,2) in s)
-        self.failUnless((6,1) in s)
-        self.failIf((6,2) in s)
-        self.failIf((7,1) in s)
-        self.failUnlessEqual(list(s.each()), [3,4,5,6])
-
-    def test_large(self):
-        s = Spans(4, 2**65) # don't do this with a SimpleSpans
-        self.failUnlessEqual(list(s), [(4, 2**65)])
-        self.failUnless(s)
-        self.failUnlessEqual(s.len(), 2**65)
-        self.failIf((0,1) in s)
-        self.failUnless((4,2) in s)
-        self.failUnless((2**65,2) in s)
-
-    def test_math(self):
-        s1 = Spans(0, 10) # 0,1,2,3,4,5,6,7,8,9
-        s2 = Spans(5, 3) # 5,6,7
-        s3 = Spans(8, 4) # 8,9,10,11
-
-        s = s1 - s2
-        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
-        s = s1 - s3
-        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
-        s = s2 - s3
-        self.failUnlessEqual(list(s.each()), [5,6,7])
-        s = s1 & s2
-        self.failUnlessEqual(list(s.each()), [5,6,7])
-        s = s2 & s1
-        self.failUnlessEqual(list(s.each()), [5,6,7])
-        s = s1 & s3
-        self.failUnlessEqual(list(s.each()), [8,9])
-        s = s3 & s1
-        self.failUnlessEqual(list(s.each()), [8,9])
-        s = s2 & s3
-        self.failUnlessEqual(list(s.each()), [])
-        s = s3 & s2
-        self.failUnlessEqual(list(s.each()), [])
-        s = Spans() & s3
-        self.failUnlessEqual(list(s.each()), [])
-        s = s3 & Spans()
-        self.failUnlessEqual(list(s.each()), [])
-
-        s = s1 + s2
-        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
-        s = s1 + s3
-        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
-        s = s2 + s3
-        self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
-
-        s = Spans(s1)
-        s -= s2
-        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
-        s = Spans(s1)
-        s -= s3
-        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
-        s = Spans(s2)
-        s -= s3
-        self.failUnlessEqual(list(s.each()), [5,6,7])
-
-        s = Spans(s1)
-        s += s2
-        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
-        s = Spans(s1)
-        s += s3
-        self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
-        s = Spans(s2)
-        s += s3
-        self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
-
-    def test_random(self):
-        # attempt to increase coverage of corner cases by comparing behavior
-        # of a simple-but-slow model implementation against the
-        # complex-but-fast actual implementation, in a large number of random
-        # operations
-        S1 = SimpleSpans
-        S2 = Spans
-        s1 = S1(); s2 = S2()
-        seed = ""
-        def _create(subseed):
-            ns1 = S1(); ns2 = S2()
-            for i in range(10):
-                what = sha256(subseed+str(i))
-                start = int(what[2:4], 16)
-                length = max(1,int(what[5:6], 16))
-                ns1.add(start, length); ns2.add(start, length)
-            return ns1, ns2
-
-        #print
-        for i in range(1000):
-            what = sha256(seed+str(i))
-            op = what[0]
-            subop = what[1]
-            start = int(what[2:4], 16)
-            length = max(1,int(what[5:6], 16))
-            #print what
-            if op in "0":
-                if subop in "01234":
-                    s1 = S1(); s2 = S2()
-                elif subop in "5678":
-                    s1 = S1(start, length); s2 = S2(start, length)
-                else:
-                    s1 = S1(s1); s2 = S2(s2)
-                #print "s2 = %s" % s2.dump()
-            elif op in "123":
-                #print "s2.add(%d,%d)" % (start, length)
-                s1.add(start, length); s2.add(start, length)
-            elif op in "456":
-                #print "s2.remove(%d,%d)" % (start, length)
-                s1.remove(start, length); s2.remove(start, length)
-            elif op in "78":
-                ns1, ns2 = _create(what[7:11])
-                #print "s2 + %s" % ns2.dump()
-                s1 = s1 + ns1; s2 = s2 + ns2
-            elif op in "9a":
-                ns1, ns2 = _create(what[7:11])
-                #print "%s - %s" % (s2.dump(), ns2.dump())
-                s1 = s1 - ns1; s2 = s2 - ns2
-            elif op in "bc":
-                ns1, ns2 = _create(what[7:11])
-                #print "s2 += %s" % ns2.dump()
-                s1 += ns1; s2 += ns2
-            elif op in "de":
-                ns1, ns2 = _create(what[7:11])
-                #print "%s -= %s" % (s2.dump(), ns2.dump())
-                s1 -= ns1; s2 -= ns2
-            else:
-                ns1, ns2 = _create(what[7:11])
-                #print "%s &= %s" % (s2.dump(), ns2.dump())
-                s1 = s1 & ns1; s2 = s2 & ns2
-            #print "s2 now %s" % s2.dump()
-            self.failUnlessEqual(list(s1.each()), list(s2.each()))
-            self.failUnlessEqual(s1.len(), s2.len())
-            self.failUnlessEqual(bool(s1), bool(s2))
-            self.failUnlessEqual(list(s1), list(s2))
-            for j in range(10):
-                what = sha256(what[12:14]+str(j))
-                start = int(what[2:4], 16)
-                length = max(1, int(what[5:6], 16))
-                span = (start, length)
-                self.failUnlessEqual(bool(span in s1), bool(span in s2))
-
-
-    # s()
-    # s(start,length)
-    # s(s0)
-    # s.add(start,length) : returns s
-    # s.remove(start,length)
-    # s.each() -> list of byte offsets, mostly for testing
-    # list(s) -> list of (start,length) tuples, one per span
-    # (start,length) in s -> True if (start..start+length-1) are all members
-    #  NOT equivalent to x in list(s)
-    # s.len() -> number of bytes, for testing, bool(), and accounting/limiting
-    # bool(s)  (__nonzeron__)
-    # s = s1+s2, s1-s2, +=s1, -=s1
-
-    def test_overlap(self):
-        for a in range(20):
-            for b in range(10):
-                for c in range(20):
-                    for d in range(10):
-                        self._test_overlap(a,b,c,d)
-
-    def _test_overlap(self, a, b, c, d):
-        s1 = set(range(a,a+b))
-        s2 = set(range(c,c+d))
-        #print "---"
-        #self._show_overlap(s1, "1")
-        #self._show_overlap(s2, "2")
-        o = overlap(a,b,c,d)
-        expected = s1.intersection(s2)
-        if not expected:
-            self.failUnlessEqual(o, None)
-        else:
-            start,length = o
-            so = set(range(start,start+length))
-            #self._show(so, "o")
-            self.failUnlessEqual(so, expected)
-
-    def _show_overlap(self, s, c):
-        import sys
-        out = sys.stdout
-        if s:
-            for i in range(max(s)):
-                if i in s:
-                    out.write(c)
-                else:
-                    out.write(" ")
-        out.write("\n")
-
-def extend(s, start, length, fill):
-    if len(s) >= start+length:
-        return s
-    assert len(fill) == 1
-    return s + fill*(start+length-len(s))
-
-def replace(s, start, data):
-    assert len(s) >= start+len(data)
-    return s[:start] + data + s[start+len(data):]
-
-class SimpleDataSpans(object):
-    def __init__(self, other=None):
-        self.missing = "" # "1" where missing, "0" where found
-        self.data = ""
-        if other:
-            for (start, data) in other.get_chunks():
-                self.add(start, data)
-
-    def __bool__(self): # this gets us bool()
-        return bool(self.len())
-    def len(self):
-        return len(self.missing.replace("1", ""))
-    def _dump(self):
-        return [i for (i,c) in enumerate(self.missing) if c == "0"]
-    def _have(self, start, length):
-        m = self.missing[start:start+length]
-        if not m or len(m)<length or int(m):
-            return False
-        return True
-    def get_chunks(self):
-        for i in self._dump():
-            yield (i, self.data[i])
-    def get_spans(self):
-        return SimpleSpans([(start,len(data))
-                            for (start,data) in self.get_chunks()])
-    def get(self, start, length):
-        if self._have(start, length):
-            return self.data[start:start+length]
-        return None
-    def pop(self, start, length):
-        data = self.get(start, length)
-        if data:
-            self.remove(start, length)
-        return data
-    def remove(self, start, length):
-        self.missing = replace(extend(self.missing, start, length, "1"),
-                               start, "1"*length)
-    def add(self, start, data):
-        self.missing = replace(extend(self.missing, start, len(data), "1"),
-                               start, "0"*len(data))
-        self.data = replace(extend(self.data, start, len(data), " "),
-                            start, data)
-
-
-class StringSpans(unittest.TestCase):
-    def do_basic(self, klass):
-        ds = klass()
-        self.failUnlessEqual(ds.len(), 0)
-        self.failUnlessEqual(list(ds._dump()), [])
-        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 0)
-        s1 = ds.get_spans()
-        self.failUnlessEqual(ds.get(0, 4), None)
-        self.failUnlessEqual(ds.pop(0, 4), None)
-        ds.remove(0, 4)
-
-        ds.add(2, "four")
-        self.failUnlessEqual(ds.len(), 4)
-        self.failUnlessEqual(list(ds._dump()), [2,3,4,5])
-        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
-        s1 = ds.get_spans()
-        self.failUnless((2,2) in s1)
-        self.failUnlessEqual(ds.get(0, 4), None)
-        self.failUnlessEqual(ds.pop(0, 4), None)
-        self.failUnlessEqual(ds.get(4, 4), None)
-
-        ds2 = klass(ds)
-        self.failUnlessEqual(ds2.len(), 4)
-        self.failUnlessEqual(list(ds2._dump()), [2,3,4,5])
-        self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 4)
-        self.failUnlessEqual(ds2.get(0, 4), None)
-        self.failUnlessEqual(ds2.pop(0, 4), None)
-        self.failUnlessEqual(ds2.pop(2, 3), "fou")
-        self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 1)
-        self.failUnlessEqual(ds2.get(2, 3), None)
-        self.failUnlessEqual(ds2.get(5, 1), "r")
-        self.failUnlessEqual(ds.get(2, 3), "fou")
-        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
-
-        ds.add(0, "23")
-        self.failUnlessEqual(ds.len(), 6)
-        self.failUnlessEqual(list(ds._dump()), [0,1,2,3,4,5])
-        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 6)
-        self.failUnlessEqual(ds.get(0, 4), "23fo")
-        self.failUnlessEqual(ds.pop(0, 4), "23fo")
-        self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 2)
-        self.failUnlessEqual(ds.get(0, 4), None)
-        self.failUnlessEqual(ds.pop(0, 4), None)
-
-        ds = klass()
-        ds.add(2, "four")
-        ds.add(3, "ea")
-        self.failUnlessEqual(ds.get(2, 4), "fear")
-
-        ds = klass()
-        ds.add(int(2), "four")
-        ds.add(int(3), "ea")
-        self.failUnlessEqual(ds.get(int(2), int(4)), "fear")
-
-
-    def do_scan(self, klass):
-        # do a test with gaps and spans of size 1 and 2
-        #  left=(1,11) * right=(1,11) * gapsize=(1,2)
-        # 111, 112, 121, 122, 211, 212, 221, 222
-        #    211
-        #      121
-        #         112
-        #            212
-        #               222
-        #                   221
-        #                      111
-        #                        122
-        #  11 1  1 11 11  11  1 1  111
-        # 0123456789012345678901234567
-        # abcdefghijklmnopqrstuvwxyz-=
-        pieces = [(1, "bc"),
-                  (4, "e"),
-                  (7, "h"),
-                  (9, "jk"),
-                  (12, "mn"),
-                  (16, "qr"),
-                  (20, "u"),
-                  (22, "w"),
-                  (25, "z-="),
-                  ]
-        p_elements = set([1,2,4,7,9,10,12,13,16,17,20,22,25,26,27])
-        S = "abcdefghijklmnopqrstuvwxyz-="
-        # TODO: when adding data, add capital letters, to make sure we aren't
-        # just leaving the old data in place
-        l = len(S)
-        def base():
-            ds = klass()
-            for start, data in pieces:
-                ds.add(start, data)
-            return ds
-        def dump(s):
-            p = set(s._dump())
-            d = "".join([((i not in p) and " " or S[i]) for i in range(l)])
-            assert len(d) == l
-            return d
-        DEBUG = False
-        for start in range(0, l):
-            for end in range(start+1, l):
-                # add [start-end) to the baseline
-                which = "%d-%d" % (start, end-1)
-                p_added = set(range(start, end))
-                b = base()
-                if DEBUG:
-                    print()
-                    print(dump(b), which)
-                    add = klass(); add.add(start, S[start:end])
-                    print(dump(add))
-                b.add(start, S[start:end])
-                if DEBUG:
-                    print(dump(b))
-                # check that the new span is there
-                d = b.get(start, end-start)
-                self.failUnlessEqual(d, S[start:end], which)
-                # check that all the original pieces are still there
-                for t_start, t_data in pieces:
-                    t_len = len(t_data)
-                    self.failUnlessEqual(b.get(t_start, t_len),
-                                         S[t_start:t_start+t_len],
-                                         "%s %d+%d" % (which, t_start, t_len))
-                # check that a lot of subspans are mostly correct
-                for t_start in range(l):
-                    for t_len in range(1,4):
-                        d = b.get(t_start, t_len)
-                        if d is not None:
-                            which2 = "%s+(%d-%d)" % (which, t_start,
-                                                     t_start+t_len-1)
-                            self.failUnlessEqual(d, S[t_start:t_start+t_len],
-                                                 which2)
-                        # check that removing a subspan gives the right value
-                        b2 = klass(b)
-                        b2.remove(t_start, t_len)
-                        removed = set(range(t_start, t_start+t_len))
-                        for i in range(l):
-                            exp = (((i in p_elements) or (i in p_added))
-                                   and (i not in removed))
-                            which2 = "%s-(%d-%d)" % (which, t_start,
-                                                     t_start+t_len-1)
-                            self.failUnlessEqual(bool(b2.get(i, 1)), exp,
-                                                 which2+" %d" % i)
-
-    def test_test(self):
-        self.do_basic(SimpleDataSpans)
-        self.do_scan(SimpleDataSpans)
-
-    def test_basic(self):
-        self.do_basic(DataSpans)
-        self.do_scan(DataSpans)
-
-    def test_random(self):
-        # attempt to increase coverage of corner cases by comparing behavior
-        # of a simple-but-slow model implementation against the
-        # complex-but-fast actual implementation, in a large number of random
-        # operations
-        S1 = SimpleDataSpans
-        S2 = DataSpans
-        s1 = S1(); s2 = S2()
-        seed = ""
-        def _randstr(length, seed):
-            created = 0
-            pieces = []
-            while created < length:
-                piece = sha256(seed + str(created))
-                pieces.append(piece)
-                created += len(piece)
-            return "".join(pieces)[:length]
-        def _create(subseed):
-            ns1 = S1(); ns2 = S2()
-            for i in range(10):
-                what = sha256(subseed+str(i))
-                start = int(what[2:4], 16)
-                length = max(1,int(what[5:6], 16))
-                ns1.add(start, _randstr(length, what[7:9]));
-                ns2.add(start, _randstr(length, what[7:9]))
-            return ns1, ns2
-
-        #print
-        for i in range(1000):
-            what = sha256(seed+str(i))
-            op = what[0]
-            subop = what[1]
-            start = int(what[2:4], 16)
-            length = max(1,int(what[5:6], 16))
-            #print what
-            if op in "0":
-                if subop in "0123456":
-                    s1 = S1(); s2 = S2()
-                else:
-                    s1, s2 = _create(what[7:11])
-                #print "s2 = %s" % list(s2._dump())
-            elif op in "123456":
-                #print "s2.add(%d,%d)" % (start, length)
-                s1.add(start, _randstr(length, what[7:9]));
-                s2.add(start, _randstr(length, what[7:9]))
-            elif op in "789abc":
-                #print "s2.remove(%d,%d)" % (start, length)
-                s1.remove(start, length); s2.remove(start, length)
-            else:
-                #print "s2.pop(%d,%d)" % (start, length)
-                d1 = s1.pop(start, length); d2 = s2.pop(start, length)
-                self.failUnlessEqual(d1, d2)
-            #print "s1 now %s" % list(s1._dump())
-            #print "s2 now %s" % list(s2._dump())
-            self.failUnlessEqual(s1.len(), s2.len())
-            self.failUnlessEqual(list(s1._dump()), list(s2._dump()))
-            for j in range(100):
-                what = sha256(what[12:14]+str(j))
-                start = int(what[2:4], 16)
-                length = max(1, int(what[5:6], 16))
-                d1 = s1.get(start, length); d2 = s2.get(start, length)
-                self.failUnlessEqual(d1, d2, "%d+%d" % (start, length))
-
 class YAML(unittest.TestCase):
    def test_convert(self):
        data = yaml.safe_dump(["str", u"unicode", u"\u1234nicode"])
--- a/src/allmydata/test/test_version.py
+++ b/src/allmydata/test/test_version.py
@ -1,3 +1,16 @@
+"""
+Tests for allmydata.util.verlib and allmydata.version_checks.
+
+Ported to Python 3.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401

 import sys
 import pkg_resources
@ -79,7 +92,7 @@ class CheckRequirement(unittest.TestCase):

        res = cross_check({}, [("foo", ("unparseable", "", None))])
        self.failUnlessEqual(len(res), 1)
-        self.failUnlessIn("version 'unparseable'", res[0])
+        self.assertTrue(("version 'unparseable'" in res[0]) or ("version u'unparseable'" in res[0]))
        self.failUnlessIn("was not found by pkg_resources", res[0])

        res = cross_check({"distribute": ("1.0", "/somewhere")}, [("setuptools", ("2.0", "/somewhere", "distribute"))])
@ -120,7 +133,7 @@ class CheckRequirement(unittest.TestCase):

        res = cross_check({"foo": ("1.0", "/somewhere")}, [("foo", ("2.0", "/somewhere_different", None))])
        self.failUnlessEqual(len(res), 1)
-        self.failUnlessIn("but version '2.0'", res[0])
+        self.assertTrue(("but version '2.0'" in res[0]) or ("but version u'2.0'" in res[0]))

    def test_extract_openssl_version(self):
        self.failUnlessEqual(extract_openssl_version(MockSSL("")),
--- a/src/allmydata/util/_python3.py
+++ b/src/allmydata/util/_python3.py
@ -22,8 +22,10 @@ PORTED_MODULES = [
    "allmydata.util.base62",
    "allmydata.util.deferredutil",
    "allmydata.util.fileutil",
+    "allmydata.util.dictutil",
    "allmydata.util.hashutil",
    "allmydata.util.humanreadable",
+    "allmydata.util.iputil",
    "allmydata.util.mathutil",
    "allmydata.util.namespace",
    "allmydata.util.netstring",
@ -31,6 +33,8 @@ PORTED_MODULES = [
    "allmydata.util.pipeline",
    "allmydata.util.pollmixin",
    "allmydata.util._python3",
+    "allmydata.util.spans",
+    "allmydata.util.statistics",
    "allmydata.util.time_format",
    "allmydata.test.common_py3",
 ]
@ -40,18 +44,22 @@ PORTED_TEST_MODULES = [
    "allmydata.test.test_base32",
    "allmydata.test.test_base62",
    "allmydata.test.test_deferredutil",
+    "allmydata.test.test_dictutil",
    "allmydata.test.test_hashtree",
    "allmydata.test.test_hashutil",
    "allmydata.test.test_humanreadable",
+    "allmydata.test.test_iputil",
    "allmydata.test.test_netstring",
    "allmydata.test.test_observer",
    "allmydata.test.test_pipeline",
    "allmydata.test.test_python3",
+    "allmydata.test.test_spans",
+    "allmydata.test.test_statistics",
    "allmydata.test.test_time_format",
    "allmydata.test.test_util",
+    "allmydata.test.test_version",
 ]

-
 if __name__ == '__main__':
    from subprocess import check_call
    check_call(["trial"] + PORTED_TEST_MODULES)
--- a/src/allmydata/util/dictutil.py
+++ b/src/allmydata/util/dictutil.py
@ -1,6 +1,20 @@
 """
 Tools to mess with dicts.
+
+Ported to Python 3.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    # IMPORTANT: We deliberately don't import dict. The issue is that we're
+    # subclassing dict, so we'd end up exposing Python 3 dict APIs to lots of
+    # code that doesn't support it.
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, int, list, object, range, str, max, min  # noqa: F401
+

 class DictOfSets(dict):
    def add(self, key, value):
--- a/src/allmydata/util/encodingutil.py
+++ b/src/allmydata/util/encodingutil.py
@ -11,6 +11,8 @@ from twisted.python.filepath import FilePath
 from allmydata.util import log
 from allmydata.util.fileutil import abspath_expanduser_unicode

+NoneType = type(None)
+

 def canonical_encoding(encoding):
    if encoding is None:
--- a/src/allmydata/util/iputil.py
+++ b/src/allmydata/util/iputil.py
@ -1,4 +1,18 @@
-# from the Python Standard Library
+"""
+Utilities for getting IP addresses.
+
+Ported to Python 3.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2, native_str
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401
+
 import os, re, socket, subprocess, errno
 from sys import platform

@ -88,13 +102,18 @@ except ImportError:
    increase_rlimits = _increase_rlimits

 def get_local_addresses_sync():
-    return _synchronously_find_addresses_via_config()
+    """
+    Return a list of IPv4 addresses (as dotted-quad native strings) that are
+    currently configured on this host, sorted in descending order of how likely
+    we think they are to work.
+    """
+    return [native_str(a) for a in _synchronously_find_addresses_via_config()]

 def get_local_addresses_async(target="198.41.0.4"): # A.ROOT-SERVERS.NET
    """
    Return a Deferred that fires with a list of IPv4 addresses (as dotted-quad
-    strings) that are currently configured on this host, sorted in descending
-    order of how likely we think they are to work.
+    native strings) that are currently configured on this host, sorted in
+    descending order of how likely we think they are to work.

    @param target: we want to learn an IP address they could try using to
        connect to us; The default value is fine, but it might help if you
@ -117,13 +136,13 @@ def get_local_addresses_async(target="198.41.0.4"): # A.ROOT-SERVERS.NET
                addresses.append(addr)
        return addresses
    d.addCallback(_collect)
-
+    d.addCallback(lambda addresses: [native_str(s) for s in addresses])
    return d

 def get_local_ip_for(target):
    """Find out what our IP address is for use by a given target.

-    @return: the IP address as a dotted-quad string which could be used by
+    @return: the IP address as a dotted-quad native string which could be used
              to connect to us. It might work for them, it might not. If
              there is no suitable address (perhaps we don't currently have an
              externally-visible interface), this will return None.
@ -162,7 +181,7 @@ def get_local_ip_for(target):
    except (socket.error, CannotListenError):
        # no route to that host
        localip = None
-    return localip
+    return native_str(localip)


 # Wow, I'm really amazed at home much mileage we've gotten out of calling
@ -171,11 +190,11 @@ def get_local_ip_for(target):
 # ... thus wrote Greg Smith in time immemorial...
 # Also, the Win32 APIs for this are really klunky and error-prone. --Daira

-_win32_re = re.compile(r'^\s*\d+\.\d+\.\d+\.\d+\s.+\s(?P<address>\d+\.\d+\.\d+\.\d+)\s+(?P<metric>\d+)\s*$', flags=re.M|re.I|re.S)
+_win32_re = re.compile(br'^\s*\d+\.\d+\.\d+\.\d+\s.+\s(?P<address>\d+\.\d+\.\d+\.\d+)\s+(?P<metric>\d+)\s*$', flags=re.M|re.I|re.S)
 _win32_commands = (('route.exe', ('print',), _win32_re),)

 # These work in most Unices.
-_addr_re = re.compile(r'^\s*inet [a-zA-Z]*:?(?P<address>\d+\.\d+\.\d+\.\d+)[\s/].+$', flags=re.M|re.I|re.S)
+_addr_re = re.compile(br'^\s*inet [a-zA-Z]*:?(?P<address>\d+\.\d+\.\d+\.\d+)[\s/].+$', flags=re.M|re.I|re.S)
 _unix_commands = (('/bin/ip', ('addr',), _addr_re),
                  ('/sbin/ip', ('addr',), _addr_re),
                  ('/sbin/ifconfig', ('-a',), _addr_re),
@ -209,10 +228,13 @@ def _synchronously_find_addresses_via_config():
        else:
            exes_to_try = which(pathtotool)

+        subprocess_error = getattr(
+            subprocess, "SubprocessError", subprocess.CalledProcessError
+        )
        for exe in exes_to_try:
            try:
                addresses = _query(exe, args, regex)
-            except Exception:
+            except (IOError, OSError, ValueError, subprocess_error):
                addresses = []
            if addresses:
                return addresses
@ -222,9 +244,9 @@ def _synchronously_find_addresses_via_config():
 def _query(path, args, regex):
    if not os.path.isfile(path):
        return []
-    env = {'LANG': 'en_US.UTF-8'}
+    env = {native_str('LANG'): native_str('en_US.UTF-8')}
    TRIES = 5
-    for trial in xrange(TRIES):
+    for trial in range(TRIES):
        try:
            p = subprocess.Popen([path] + list(args), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
            (output, err) = p.communicate()
@ -235,13 +257,13 @@ def _query(path, args, regex):
            raise

    addresses = []
-    outputsplit = output.split('\n')
+    outputsplit = output.split(b'\n')
    for outline in outputsplit:
        m = regex.match(outline)
        if m:
            addr = m.group('address')
            if addr not in addresses:
-                addresses.append(addr)
+                addresses.append(addr.decode("utf-8"))

    return addresses

@ -304,7 +326,7 @@ def _foolscapEndpointForPortNumber(portnum):
            # approach is error prone for the reasons described on
            # https://tahoe-lafs.org/trac/tahoe-lafs/ticket/2787
            portnum = allocate_tcp_port()
-    return (portnum, "tcp:%d" % (portnum,))
+    return (portnum, native_str("tcp:%d" % (portnum,)))


@implementer(IStreamServerEndpoint)
@ -353,7 +375,7 @@ def listenOnUnused(tub, portnum=None):
    """
    portnum, endpoint = _foolscapEndpointForPortNumber(portnum)
    tub.listenOn(endpoint)
-    tub.setLocation("localhost:%d" % (portnum,))
+    tub.setLocation(native_str("localhost:%d" % (portnum,)))
    return portnum


@ -362,4 +384,5 @@ __all__ = ["allocate_tcp_port",
           "get_local_addresses_sync",
           "get_local_addresses_async",
           "get_local_ip_for",
+           "listenOnUnused",
           ]
--- a/src/allmydata/util/spans.py
+++ b/src/allmydata/util/spans.py
@ -1,4 +1,11 @@
 from __future__ import print_function
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401

 from future.utils import PY2
 if PY2:
@ -162,6 +169,8 @@ class Spans(object):
    def __bool__(self): # this gets us bool()
        return bool(self.len())

+    #__nonzero__ = __bool__  # Python 2 backwards compatibility
+
    def len(self):
        # guess what! python doesn't allow __len__ to return a long, only an
        # int. So we stop using len(spans), use spans.len() instead.
--- a/src/allmydata/util/statistics.py
+++ b/src/allmydata/util/statistics.py
@ -1,3 +1,8 @@
+"""
+Statistical utilities.
+
+Ported to Python 3.
+"""
 # Copyright (c) 2009 Shawn Willden
 # mailto:shawn@willden.org
 # I hereby license all patches I have contributed or will contribute to the
@ -5,7 +10,18 @@
 # either the GNU General Public License, version 2 or later, or under the
 # Transitive Grace Period Public License, version 1 or later.

-from __future__ import division, print_function
+
+from __future__ import unicode_literals
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401
+
+from functools import reduce
+
 from allmydata.util.mathutil import round_sigfigs
 import math
 from functools import reduce
@ -79,7 +95,7 @@ def survival_pmf_via_bd(p_list):
    """
    pmf_list = [ binomial_distribution_pmf(p_list.count(p), p)
                 for p in set(p_list) ]
-    return reduce(convolve, pmf_list)
+    return list(reduce(convolve, pmf_list))

 def survival_pmf_via_conv(p_list):
    """
@ -90,7 +106,7 @@ def survival_pmf_via_conv(p_list):
    intended for internal use and testing only.
    """
    pmf_list = [ [1 - p, p] for p in p_list ];
-    return reduce(convolve, pmf_list)
+    return list(reduce(convolve, pmf_list))

 def print_pmf(pmf, n=4, out=sys.stdout):
    """
--- a/src/allmydata/util/verlib.py
+++ b/src/allmydata/util/verlib.py
@ -1,11 +1,21 @@
 """
 "Rational" version definition and parsing for DistutilsVersionFight
 discussion at PyCon 2009.
-"""

+Ported to Python 3.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401

 import re

+
 class IrrationalVersionError(Exception):
    """This is an irrational version."""
    pass
--- a/src/allmydata/version_checks.py
+++ b/src/allmydata/version_checks.py
@ -1,7 +1,17 @@
 """
 Produce reports about the versions of Python software in use by Tahoe-LAFS
 for debugging and auditing purposes.
+
+Ported to Python 3.
 """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from future.utils import PY2
+if PY2:
+    from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, int, list, object, range, str, max, min  # noqa: F401

 __all__ = [
    "PackagingError",
@ -10,10 +20,12 @@ __all__ = [
    "normalized_version",
 ]

-import os, platform, re, subprocess, sys, traceback, pkg_resources
+import os, platform, re, sys, traceback, pkg_resources

 import six

+import distro
+
 from . import (
    __appname__,
    full_version,
@ -80,7 +92,7 @@ def normalized_version(verstr, what=None):
        return verlib.NormalizedVersion(suggested)
    except verlib.IrrationalVersionError:
        raise
-    except StandardError:
+    except Exception:
        cls, value, trace = sys.exc_info()
        new_exc = PackagingError("could not parse %s due to %s: %s"
                                 % (what or repr(verstr), cls.__name__, value))
@ -109,7 +121,7 @@ def _get_error_string(errors, debug=False):
 def _cross_check(pkg_resources_vers_and_locs, imported_vers_and_locs_list):
    """This function returns a list of errors due to any failed cross-checks."""

-    from _auto_deps import not_import_versionable
+    from ._auto_deps import not_import_versionable

    errors = []
    not_pkg_resourceable = ['python', 'platform', __appname__.lower(), 'openssl']
@ -201,83 +213,6 @@ def _extract_openssl_version(ssl_module):

    return (version, None, comment if comment else None)

-def _get_linux_distro():
-    """ Tries to determine the name of the Linux OS distribution name.
-
-    First, try to parse a file named "/etc/lsb-release".  If it exists, and
-    contains the "DISTRIB_ID=" line and the "DISTRIB_RELEASE=" line, then return
-    the strings parsed from that file.
-
-    If that doesn't work, then invoke platform.dist().
-
-    If that doesn't work, then try to execute "lsb_release", as standardized in
-    2001:
-
-    http://refspecs.freestandards.org/LSB_1.0.0/gLSB/lsbrelease.html
-
-    The current version of the standard is here:
-
-    http://refspecs.freestandards.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/lsbrelease.html
-
-    that lsb_release emitted, as strings.
-
-    Returns a tuple (distname,version). Distname is what LSB calls a
-    "distributor id", e.g. "Ubuntu".  Version is what LSB calls a "release",
-    e.g. "8.04".
-
-    A version of this has been submitted to python as a patch for the standard
-    library module "platform":
-
-    http://bugs.python.org/issue3937
-    """
-    global _distname,_version
-    if _distname and _version:
-        return (_distname, _version)
-
-    try:
-        with open("/etc/lsb-release", "rU") as etclsbrel:
-            for line in etclsbrel:
-                m = _distributor_id_file_re.search(line)
-                if m:
-                    _distname = m.group(1).strip()
-                    if _distname and _version:
-                        return (_distname, _version)
-                m = _release_file_re.search(line)
-                if m:
-                    _version = m.group(1).strip()
-                    if _distname and _version:
-                        return (_distname, _version)
-    except EnvironmentError:
-        pass
-
-    (_distname, _version) = platform.dist()[:2]
-    if _distname and _version:
-        return (_distname, _version)
-
-    if os.path.isfile("/usr/bin/lsb_release") or os.path.isfile("/bin/lsb_release"):
-        try:
-            p = subprocess.Popen(["lsb_release", "--all"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            rc = p.wait()
-            if rc == 0:
-                for line in p.stdout.readlines():
-                    m = _distributor_id_cmdline_re.search(line)
-                    if m:
-                        _distname = m.group(1).strip()
-                        if _distname and _version:
-                            return (_distname, _version)
-
-                    m = _release_cmdline_re.search(p.stdout.read())
-                    if m:
-                        _version = m.group(1).strip()
-                        if _distname and _version:
-                            return (_distname, _version)
-        except EnvironmentError:
-            pass
-
-    if os.path.exists("/etc/arch-release"):
-        return ("Arch_Linux", "")
-
-    return (_distname,_version)

 def _get_platform():
    # Our version of platform.platform(), telling us both less and more than the
@ -288,7 +223,7 @@ def _get_platform():
    if "linux" in platform.system().lower():
        return (
            platform.system() + "-" +
-            "_".join(_get_linux_distro()) + "-" +
+            "_".join(distro.linux_distribution()[:2]) + "-" +
            platform.machine() + "-" +
            "_".join([x for x in platform.architecture() if x])
        )
@ -321,7 +256,7 @@ def _get_package_versions_and_locations():
        for modulename in warning_imports:
            try:
                __import__(modulename)
-            except ImportError:
+            except (ImportError, SyntaxError):
                pass
    finally:
        # Leave suppressions for UserWarnings and global_deprecation_messages active.
@ -355,7 +290,7 @@ def _get_package_versions_and_locations():
            try:
                __import__(modulename)
                module = sys.modules[modulename]
-            except ImportError:
+            except (ImportError, SyntaxError):
                etype, emsg, etrace = sys.exc_info()
                trace_info = (etype, str(emsg), ([None] + traceback.extract_tb(etrace))[-1])
                packages.append( (pkgname, (None, None, trace_info)) )
@ -386,7 +321,7 @@ def _get_package_versions_and_locations():
        imported_packages = set([p.lower() for (p, _) in packages])
        extra_packages = []

-        for pr_name, (pr_ver, pr_loc) in pkg_resources_vers_and_locs.iteritems():
+        for pr_name, (pr_ver, pr_loc) in pkg_resources_vers_and_locs.items():
            if pr_name not in imported_packages and pr_name not in ignorable:
                extra_packages.append( (pr_name, (pr_ver, pr_loc, "according to pkg_resources")) )