mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-01-17 10:20:13 +00:00
Add a byte-spans utility class, like perl's Set::IntSpan for .newsrc files.
Also a data-spans class, which records a byte (instead of a bit) for each index.
This commit is contained in:
parent
cd360c847c
commit
cbcb728e7e
@ -7,12 +7,14 @@ from twisted.trial import unittest
|
||||
from twisted.internet import defer, reactor
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.python import log
|
||||
from hashlib import md5
|
||||
|
||||
from allmydata.util import base32, idlib, humanreadable, mathutil, hashutil
|
||||
from allmydata.util import assertutil, fileutil, deferredutil, abbreviate
|
||||
from allmydata.util import limiter, time_format, pollmixin, cachedir
|
||||
from allmydata.util import statistics, dictutil, pipeline
|
||||
from allmydata.util import log as tahoe_log
|
||||
from allmydata.util.spans import Spans, overlap, DataSpans
|
||||
|
||||
class Base32(unittest.TestCase):
|
||||
def test_b2a_matches_Pythons(self):
|
||||
@ -1568,3 +1570,566 @@ class Log(unittest.TestCase):
|
||||
tahoe_log.err(format="intentional sample error",
|
||||
failure=f, level=tahoe_log.OPERATIONAL, umid="wO9UoQ")
|
||||
self.flushLoggedErrors(SampleError)
|
||||
|
||||
|
||||
class SimpleSpans:
|
||||
# this is a simple+inefficient form of util.spans.Spans . We compare the
|
||||
# behavior of this reference model against the real (efficient) form.
|
||||
|
||||
def __init__(self, _span_or_start=None, length=None):
|
||||
self._have = set()
|
||||
if length is not None:
|
||||
for i in range(_span_or_start, _span_or_start+length):
|
||||
self._have.add(i)
|
||||
elif _span_or_start:
|
||||
for (start,length) in _span_or_start:
|
||||
self.add(start, length)
|
||||
|
||||
def add(self, start, length):
|
||||
for i in range(start, start+length):
|
||||
self._have.add(i)
|
||||
return self
|
||||
|
||||
def remove(self, start, length):
|
||||
for i in range(start, start+length):
|
||||
self._have.discard(i)
|
||||
return self
|
||||
|
||||
def each(self):
|
||||
return sorted(self._have)
|
||||
|
||||
def __iter__(self):
|
||||
items = sorted(self._have)
|
||||
prevstart = None
|
||||
prevend = None
|
||||
for i in items:
|
||||
if prevstart is None:
|
||||
prevstart = prevend = i
|
||||
continue
|
||||
if i == prevend+1:
|
||||
prevend = i
|
||||
continue
|
||||
yield (prevstart, prevend-prevstart+1)
|
||||
prevstart = prevend = i
|
||||
if prevstart is not None:
|
||||
yield (prevstart, prevend-prevstart+1)
|
||||
|
||||
def __len__(self):
|
||||
# this also gets us bool(s)
|
||||
return len(self._have)
|
||||
|
||||
def __add__(self, other):
|
||||
s = self.__class__(self)
|
||||
for (start, length) in other:
|
||||
s.add(start, length)
|
||||
return s
|
||||
|
||||
def __sub__(self, other):
|
||||
s = self.__class__(self)
|
||||
for (start, length) in other:
|
||||
s.remove(start, length)
|
||||
return s
|
||||
|
||||
def __iadd__(self, other):
|
||||
for (start, length) in other:
|
||||
self.add(start, length)
|
||||
return self
|
||||
|
||||
def __isub__(self, other):
|
||||
for (start, length) in other:
|
||||
self.remove(start, length)
|
||||
return self
|
||||
|
||||
def __and__(self, other):
|
||||
s = self.__class__()
|
||||
for i in other.each():
|
||||
if i in self._have:
|
||||
s.add(i, 1)
|
||||
return s
|
||||
|
||||
def __contains__(self, (start,length)):
|
||||
for i in range(start, start+length):
|
||||
if i not in self._have:
|
||||
return False
|
||||
return True
|
||||
|
||||
class ByteSpans(unittest.TestCase):
|
||||
def test_basic(self):
|
||||
s = Spans()
|
||||
self.failUnlessEqual(list(s), [])
|
||||
self.failIf(s)
|
||||
self.failIf((0,1) in s)
|
||||
self.failUnlessEqual(len(s), 0)
|
||||
|
||||
s1 = Spans(3, 4) # 3,4,5,6
|
||||
self._check1(s1)
|
||||
|
||||
s2 = Spans(s1)
|
||||
self._check1(s2)
|
||||
|
||||
s2.add(10,2) # 10,11
|
||||
self._check1(s1)
|
||||
self.failUnless((10,1) in s2)
|
||||
self.failIf((10,1) in s1)
|
||||
self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11])
|
||||
self.failUnlessEqual(len(s2), 6)
|
||||
|
||||
s2.add(15,2).add(20,2)
|
||||
self.failUnlessEqual(list(s2.each()), [3,4,5,6,10,11,15,16,20,21])
|
||||
self.failUnlessEqual(len(s2), 10)
|
||||
|
||||
s2.remove(4,3).remove(15,1)
|
||||
self.failUnlessEqual(list(s2.each()), [3,10,11,16,20,21])
|
||||
self.failUnlessEqual(len(s2), 6)
|
||||
|
||||
s1 = SimpleSpans(3, 4) # 3 4 5 6
|
||||
s2 = SimpleSpans(5, 4) # 5 6 7 8
|
||||
i = s1 & s2
|
||||
self.failUnlessEqual(list(i.each()), [5, 6])
|
||||
|
||||
def _check1(self, s):
|
||||
self.failUnlessEqual(list(s), [(3,4)])
|
||||
self.failUnless(s)
|
||||
self.failUnlessEqual(len(s), 4)
|
||||
self.failIf((0,1) in s)
|
||||
self.failUnless((3,4) in s)
|
||||
self.failUnless((3,1) in s)
|
||||
self.failUnless((5,2) in s)
|
||||
self.failUnless((6,1) in s)
|
||||
self.failIf((6,2) in s)
|
||||
self.failIf((7,1) in s)
|
||||
self.failUnlessEqual(list(s.each()), [3,4,5,6])
|
||||
|
||||
def test_math(self):
|
||||
s1 = Spans(0, 10) # 0,1,2,3,4,5,6,7,8,9
|
||||
s2 = Spans(5, 3) # 5,6,7
|
||||
s3 = Spans(8, 4) # 8,9,10,11
|
||||
|
||||
s = s1 - s2
|
||||
self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
|
||||
s = s1 - s3
|
||||
self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
|
||||
s = s2 - s3
|
||||
self.failUnlessEqual(list(s.each()), [5,6,7])
|
||||
s = s1 & s2
|
||||
self.failUnlessEqual(list(s.each()), [5,6,7])
|
||||
s = s2 & s1
|
||||
self.failUnlessEqual(list(s.each()), [5,6,7])
|
||||
s = s1 & s3
|
||||
self.failUnlessEqual(list(s.each()), [8,9])
|
||||
s = s3 & s1
|
||||
self.failUnlessEqual(list(s.each()), [8,9])
|
||||
s = s2 & s3
|
||||
self.failUnlessEqual(list(s.each()), [])
|
||||
s = s3 & s2
|
||||
self.failUnlessEqual(list(s.each()), [])
|
||||
s = Spans() & s3
|
||||
self.failUnlessEqual(list(s.each()), [])
|
||||
s = s3 & Spans()
|
||||
self.failUnlessEqual(list(s.each()), [])
|
||||
|
||||
s = s1 + s2
|
||||
self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
|
||||
s = s1 + s3
|
||||
self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
|
||||
s = s2 + s3
|
||||
self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
|
||||
|
||||
s = Spans(s1)
|
||||
s -= s2
|
||||
self.failUnlessEqual(list(s.each()), [0,1,2,3,4,8,9])
|
||||
s = Spans(s1)
|
||||
s -= s3
|
||||
self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7])
|
||||
s = Spans(s2)
|
||||
s -= s3
|
||||
self.failUnlessEqual(list(s.each()), [5,6,7])
|
||||
|
||||
s = Spans(s1)
|
||||
s += s2
|
||||
self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9])
|
||||
s = Spans(s1)
|
||||
s += s3
|
||||
self.failUnlessEqual(list(s.each()), [0,1,2,3,4,5,6,7,8,9,10,11])
|
||||
s = Spans(s2)
|
||||
s += s3
|
||||
self.failUnlessEqual(list(s.each()), [5,6,7,8,9,10,11])
|
||||
|
||||
def test_random(self):
|
||||
# attempt to increase coverage of corner cases by comparing behavior
|
||||
# of a simple-but-slow model implementation against the
|
||||
# complex-but-fast actual implementation, in a large number of random
|
||||
# operations
|
||||
S1 = SimpleSpans
|
||||
S2 = Spans
|
||||
s1 = S1(); s2 = S2()
|
||||
seed = ""
|
||||
def _create(subseed):
|
||||
ns1 = S1(); ns2 = S2()
|
||||
for i in range(10):
|
||||
what = md5(subseed+str(i)).hexdigest()
|
||||
start = int(what[2:4], 16)
|
||||
length = max(1,int(what[5:6], 16))
|
||||
ns1.add(start, length); ns2.add(start, length)
|
||||
return ns1, ns2
|
||||
|
||||
#print
|
||||
for i in range(1000):
|
||||
what = md5(seed+str(i)).hexdigest()
|
||||
op = what[0]
|
||||
subop = what[1]
|
||||
start = int(what[2:4], 16)
|
||||
length = max(1,int(what[5:6], 16))
|
||||
#print what
|
||||
if op in "0":
|
||||
if subop in "01234":
|
||||
s1 = S1(); s2 = S2()
|
||||
elif subop in "5678":
|
||||
s1 = S1(start, length); s2 = S2(start, length)
|
||||
else:
|
||||
s1 = S1(s1); s2 = S2(s2)
|
||||
#print "s2 = %s" % s2.dump()
|
||||
elif op in "123":
|
||||
#print "s2.add(%d,%d)" % (start, length)
|
||||
s1.add(start, length); s2.add(start, length)
|
||||
elif op in "456":
|
||||
#print "s2.remove(%d,%d)" % (start, length)
|
||||
s1.remove(start, length); s2.remove(start, length)
|
||||
elif op in "78":
|
||||
ns1, ns2 = _create(what[7:11])
|
||||
#print "s2 + %s" % ns2.dump()
|
||||
s1 = s1 + ns1; s2 = s2 + ns2
|
||||
elif op in "9a":
|
||||
ns1, ns2 = _create(what[7:11])
|
||||
#print "%s - %s" % (s2.dump(), ns2.dump())
|
||||
s1 = s1 - ns1; s2 = s2 - ns2
|
||||
elif op in "bc":
|
||||
ns1, ns2 = _create(what[7:11])
|
||||
#print "s2 += %s" % ns2.dump()
|
||||
s1 += ns1; s2 += ns2
|
||||
elif op in "de":
|
||||
ns1, ns2 = _create(what[7:11])
|
||||
#print "%s -= %s" % (s2.dump(), ns2.dump())
|
||||
s1 -= ns1; s2 -= ns2
|
||||
else:
|
||||
ns1, ns2 = _create(what[7:11])
|
||||
#print "%s &= %s" % (s2.dump(), ns2.dump())
|
||||
s1 = s1 & ns1; s2 = s2 & ns2
|
||||
#print "s2 now %s" % s2.dump()
|
||||
self.failUnlessEqual(list(s1.each()), list(s2.each()))
|
||||
self.failUnlessEqual(len(s1), len(s2))
|
||||
self.failUnlessEqual(bool(s1), bool(s2))
|
||||
self.failUnlessEqual(list(s1), list(s2))
|
||||
for j in range(10):
|
||||
what = md5(what[12:14]+str(j)).hexdigest()
|
||||
start = int(what[2:4], 16)
|
||||
length = max(1, int(what[5:6], 16))
|
||||
span = (start, length)
|
||||
self.failUnlessEqual(bool(span in s1), bool(span in s2))
|
||||
|
||||
|
||||
# s()
|
||||
# s(start,length)
|
||||
# s(s0)
|
||||
# s.add(start,length) : returns s
|
||||
# s.remove(start,length)
|
||||
# s.each() -> list of byte offsets, mostly for testing
|
||||
# list(s) -> list of (start,length) tuples, one per span
|
||||
# (start,length) in s -> True if (start..start+length-1) are all members
|
||||
# NOT equivalent to x in list(s)
|
||||
# len(s) -> number of bytes, for testing, bool(), and accounting/limiting
|
||||
# bool(s) (__len__)
|
||||
# s = s1+s2, s1-s2, +=s1, -=s1
|
||||
|
||||
def test_overlap(self):
|
||||
for a in range(20):
|
||||
for b in range(10):
|
||||
for c in range(20):
|
||||
for d in range(10):
|
||||
self._test_overlap(a,b,c,d)
|
||||
|
||||
def _test_overlap(self, a, b, c, d):
|
||||
s1 = set(range(a,a+b))
|
||||
s2 = set(range(c,c+d))
|
||||
#print "---"
|
||||
#self._show_overlap(s1, "1")
|
||||
#self._show_overlap(s2, "2")
|
||||
o = overlap(a,b,c,d)
|
||||
expected = s1.intersection(s2)
|
||||
if not expected:
|
||||
self.failUnlessEqual(o, None)
|
||||
else:
|
||||
start,length = o
|
||||
so = set(range(start,start+length))
|
||||
#self._show(so, "o")
|
||||
self.failUnlessEqual(so, expected)
|
||||
|
||||
def _show_overlap(self, s, c):
|
||||
import sys
|
||||
out = sys.stdout
|
||||
if s:
|
||||
for i in range(max(s)):
|
||||
if i in s:
|
||||
out.write(c)
|
||||
else:
|
||||
out.write(" ")
|
||||
out.write("\n")
|
||||
|
||||
def extend(s, start, length, fill):
|
||||
if len(s) >= start+length:
|
||||
return s
|
||||
assert len(fill) == 1
|
||||
return s + fill*(start+length-len(s))
|
||||
|
||||
def replace(s, start, data):
|
||||
assert len(s) >= start+len(data)
|
||||
return s[:start] + data + s[start+len(data):]
|
||||
|
||||
class SimpleDataSpans:
|
||||
def __init__(self, other=None):
|
||||
self.missing = "" # "1" where missing, "0" where found
|
||||
self.data = ""
|
||||
if other:
|
||||
for (start, data) in other.get_chunks():
|
||||
self.add(start, data)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.missing.translate(None, "1"))
|
||||
def _dump(self):
|
||||
return [i for (i,c) in enumerate(self.missing) if c == "0"]
|
||||
def _have(self, start, length):
|
||||
m = self.missing[start:start+length]
|
||||
if not m or len(m)<length or int(m):
|
||||
return False
|
||||
return True
|
||||
def get_chunks(self):
|
||||
for i in self._dump():
|
||||
yield (i, self.data[i])
|
||||
def get_spans(self):
|
||||
return SimpleSpans([(start,len(data))
|
||||
for (start,data) in self.get_chunks()])
|
||||
def get(self, start, length):
|
||||
if self._have(start, length):
|
||||
return self.data[start:start+length]
|
||||
return None
|
||||
def pop(self, start, length):
|
||||
data = self.get(start, length)
|
||||
if data:
|
||||
self.remove(start, length)
|
||||
return data
|
||||
def remove(self, start, length):
|
||||
self.missing = replace(extend(self.missing, start, length, "1"),
|
||||
start, "1"*length)
|
||||
def add(self, start, data):
|
||||
self.missing = replace(extend(self.missing, start, len(data), "1"),
|
||||
start, "0"*len(data))
|
||||
self.data = replace(extend(self.data, start, len(data), " "),
|
||||
start, data)
|
||||
|
||||
|
||||
class StringSpans(unittest.TestCase):
|
||||
def do_basic(self, klass):
|
||||
ds = klass()
|
||||
self.failUnlessEqual(len(ds), 0)
|
||||
self.failUnlessEqual(list(ds._dump()), [])
|
||||
self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 0)
|
||||
s = ds.get_spans()
|
||||
self.failUnlessEqual(ds.get(0, 4), None)
|
||||
self.failUnlessEqual(ds.pop(0, 4), None)
|
||||
ds.remove(0, 4)
|
||||
|
||||
ds.add(2, "four")
|
||||
self.failUnlessEqual(len(ds), 4)
|
||||
self.failUnlessEqual(list(ds._dump()), [2,3,4,5])
|
||||
self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
|
||||
s = ds.get_spans()
|
||||
self.failUnless((2,2) in s)
|
||||
self.failUnlessEqual(ds.get(0, 4), None)
|
||||
self.failUnlessEqual(ds.pop(0, 4), None)
|
||||
self.failUnlessEqual(ds.get(4, 4), None)
|
||||
|
||||
ds2 = klass(ds)
|
||||
self.failUnlessEqual(len(ds2), 4)
|
||||
self.failUnlessEqual(list(ds2._dump()), [2,3,4,5])
|
||||
self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 4)
|
||||
self.failUnlessEqual(ds2.get(0, 4), None)
|
||||
self.failUnlessEqual(ds2.pop(0, 4), None)
|
||||
self.failUnlessEqual(ds2.pop(2, 3), "fou")
|
||||
self.failUnlessEqual(sum([len(d) for (s,d) in ds2.get_chunks()]), 1)
|
||||
self.failUnlessEqual(ds2.get(2, 3), None)
|
||||
self.failUnlessEqual(ds2.get(5, 1), "r")
|
||||
self.failUnlessEqual(ds.get(2, 3), "fou")
|
||||
self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 4)
|
||||
|
||||
ds.add(0, "23")
|
||||
self.failUnlessEqual(len(ds), 6)
|
||||
self.failUnlessEqual(list(ds._dump()), [0,1,2,3,4,5])
|
||||
self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 6)
|
||||
self.failUnlessEqual(ds.get(0, 4), "23fo")
|
||||
self.failUnlessEqual(ds.pop(0, 4), "23fo")
|
||||
self.failUnlessEqual(sum([len(d) for (s,d) in ds.get_chunks()]), 2)
|
||||
self.failUnlessEqual(ds.get(0, 4), None)
|
||||
self.failUnlessEqual(ds.pop(0, 4), None)
|
||||
|
||||
ds = klass()
|
||||
ds.add(2, "four")
|
||||
ds.add(3, "ea")
|
||||
self.failUnlessEqual(ds.get(2, 4), "fear")
|
||||
|
||||
def do_scan(self, klass):
|
||||
# do a test with gaps and spans of size 1 and 2
|
||||
# left=(1,11) * right=(1,11) * gapsize=(1,2)
|
||||
# 111, 112, 121, 122, 211, 212, 221, 222
|
||||
# 211
|
||||
# 121
|
||||
# 112
|
||||
# 212
|
||||
# 222
|
||||
# 221
|
||||
# 111
|
||||
# 122
|
||||
# 11 1 1 11 11 11 1 1 111
|
||||
# 0123456789012345678901234567
|
||||
# abcdefghijklmnopqrstuvwxyz-=
|
||||
pieces = [(1, "bc"),
|
||||
(4, "e"),
|
||||
(7, "h"),
|
||||
(9, "jk"),
|
||||
(12, "mn"),
|
||||
(16, "qr"),
|
||||
(20, "u"),
|
||||
(22, "w"),
|
||||
(25, "z-="),
|
||||
]
|
||||
p_elements = set([1,2,4,7,9,10,12,13,16,17,20,22,25,26,27])
|
||||
S = "abcdefghijklmnopqrstuvwxyz-="
|
||||
# TODO: when adding data, add capital letters, to make sure we aren't
|
||||
# just leaving the old data in place
|
||||
l = len(S)
|
||||
def base():
|
||||
ds = klass()
|
||||
for start, data in pieces:
|
||||
ds.add(start, data)
|
||||
return ds
|
||||
def dump(s):
|
||||
p = set(s._dump())
|
||||
# wow, this is the first time I've ever wanted ?: in python
|
||||
# note: this requires python2.5
|
||||
d = "".join([(S[i] if i in p else " ") for i in range(l)])
|
||||
assert len(d) == l
|
||||
return d
|
||||
DEBUG = False
|
||||
for start in range(0, l):
|
||||
for end in range(start+1, l):
|
||||
# add [start-end) to the baseline
|
||||
which = "%d-%d" % (start, end-1)
|
||||
p_added = set(range(start, end))
|
||||
b = base()
|
||||
if DEBUG:
|
||||
print
|
||||
print dump(b), which
|
||||
add = klass(); add.add(start, S[start:end])
|
||||
print dump(add)
|
||||
b.add(start, S[start:end])
|
||||
if DEBUG:
|
||||
print dump(b)
|
||||
# check that the new span is there
|
||||
d = b.get(start, end-start)
|
||||
self.failUnlessEqual(d, S[start:end], which)
|
||||
# check that all the original pieces are still there
|
||||
for t_start, t_data in pieces:
|
||||
t_len = len(t_data)
|
||||
self.failUnlessEqual(b.get(t_start, t_len),
|
||||
S[t_start:t_start+t_len],
|
||||
"%s %d+%d" % (which, t_start, t_len))
|
||||
# check that a lot of subspans are mostly correct
|
||||
for t_start in range(l):
|
||||
for t_len in range(1,4):
|
||||
d = b.get(t_start, t_len)
|
||||
if d is not None:
|
||||
which2 = "%s+(%d-%d)" % (which, t_start,
|
||||
t_start+t_len-1)
|
||||
self.failUnlessEqual(d, S[t_start:t_start+t_len],
|
||||
which2)
|
||||
# check that removing a subspan gives the right value
|
||||
b2 = klass(b)
|
||||
b2.remove(t_start, t_len)
|
||||
removed = set(range(t_start, t_start+t_len))
|
||||
for i in range(l):
|
||||
exp = (((i in p_elements) or (i in p_added))
|
||||
and (i not in removed))
|
||||
which2 = "%s-(%d-%d)" % (which, t_start,
|
||||
t_start+t_len-1)
|
||||
self.failUnlessEqual(bool(b2.get(i, 1)), exp,
|
||||
which2+" %d" % i)
|
||||
|
||||
def test_test(self):
|
||||
self.do_basic(SimpleDataSpans)
|
||||
self.do_scan(SimpleDataSpans)
|
||||
|
||||
def test_basic(self):
|
||||
self.do_basic(DataSpans)
|
||||
self.do_scan(DataSpans)
|
||||
|
||||
def test_random(self):
|
||||
# attempt to increase coverage of corner cases by comparing behavior
|
||||
# of a simple-but-slow model implementation against the
|
||||
# complex-but-fast actual implementation, in a large number of random
|
||||
# operations
|
||||
S1 = SimpleDataSpans
|
||||
S2 = DataSpans
|
||||
s1 = S1(); s2 = S2()
|
||||
seed = ""
|
||||
def _randstr(length, seed):
|
||||
created = 0
|
||||
pieces = []
|
||||
while created < length:
|
||||
piece = md5(seed + str(created)).hexdigest()
|
||||
pieces.append(piece)
|
||||
created += len(piece)
|
||||
return "".join(pieces)[:length]
|
||||
def _create(subseed):
|
||||
ns1 = S1(); ns2 = S2()
|
||||
for i in range(10):
|
||||
what = md5(subseed+str(i)).hexdigest()
|
||||
start = int(what[2:4], 16)
|
||||
length = max(1,int(what[5:6], 16))
|
||||
ns1.add(start, _randstr(length, what[7:9]));
|
||||
ns2.add(start, _randstr(length, what[7:9]))
|
||||
return ns1, ns2
|
||||
|
||||
#print
|
||||
for i in range(1000):
|
||||
what = md5(seed+str(i)).hexdigest()
|
||||
op = what[0]
|
||||
subop = what[1]
|
||||
start = int(what[2:4], 16)
|
||||
length = max(1,int(what[5:6], 16))
|
||||
#print what
|
||||
if op in "0":
|
||||
if subop in "0123456":
|
||||
s1 = S1(); s2 = S2()
|
||||
else:
|
||||
s1, s2 = _create(what[7:11])
|
||||
#print "s2 = %s" % list(s2._dump())
|
||||
elif op in "123456":
|
||||
#print "s2.add(%d,%d)" % (start, length)
|
||||
s1.add(start, _randstr(length, what[7:9]));
|
||||
s2.add(start, _randstr(length, what[7:9]))
|
||||
elif op in "789abc":
|
||||
#print "s2.remove(%d,%d)" % (start, length)
|
||||
s1.remove(start, length); s2.remove(start, length)
|
||||
else:
|
||||
#print "s2.pop(%d,%d)" % (start, length)
|
||||
d1 = s1.pop(start, length); d2 = s2.pop(start, length)
|
||||
self.failUnlessEqual(d1, d2)
|
||||
#print "s1 now %s" % list(s1._dump())
|
||||
#print "s2 now %s" % list(s2._dump())
|
||||
self.failUnlessEqual(len(s1), len(s2))
|
||||
self.failUnlessEqual(list(s1._dump()), list(s2._dump()))
|
||||
for j in range(100):
|
||||
what = md5(what[12:14]+str(j)).hexdigest()
|
||||
start = int(what[2:4], 16)
|
||||
length = max(1, int(what[5:6], 16))
|
||||
d1 = s1.get(start, length); d2 = s2.get(start, length)
|
||||
self.failUnlessEqual(d1, d2, "%d+%d" % (start, length))
|
||||
|
431
src/allmydata/util/spans.py
Executable file
431
src/allmydata/util/spans.py
Executable file
@ -0,0 +1,431 @@
|
||||
|
||||
class Spans:
|
||||
"""I represent a compressed list of booleans, one per index (an integer).
|
||||
Typically, each index represents an offset into a large string, pointing
|
||||
to a specific byte of a share. In this context, True means that byte has
|
||||
been received, or has been requested.
|
||||
|
||||
Another way to look at this is maintaining a set of integers, optimized
|
||||
for operations on spans like 'add range to set' and 'is range in set?'.
|
||||
|
||||
This is a python equivalent of perl's Set::IntSpan module, frequently
|
||||
used to represent .newsrc contents.
|
||||
|
||||
Rather than storing an actual (large) list or dictionary, I represent my
|
||||
internal state as a sorted list of spans, each with a start and a length.
|
||||
My API is presented in terms of start+length pairs. I provide set
|
||||
arithmetic operators, to efficiently answer questions like 'I want bytes
|
||||
XYZ, I already requested bytes ABC, and I've already received bytes DEF:
|
||||
what bytes should I request now?'.
|
||||
|
||||
The new downloader will use it to keep track of which bytes we've requested
|
||||
or received already.
|
||||
"""
|
||||
|
||||
def __init__(self, _span_or_start=None, length=None):
|
||||
self._spans = list()
|
||||
if length is not None:
|
||||
self._spans.append( (_span_or_start, length) )
|
||||
elif _span_or_start:
|
||||
for (start,length) in _span_or_start:
|
||||
self.add(start, length)
|
||||
self._check()
|
||||
|
||||
def _check(self):
|
||||
assert sorted(self._spans) == self._spans
|
||||
prev_end = None
|
||||
try:
|
||||
for (start,length) in self._spans:
|
||||
if prev_end is not None:
|
||||
assert start > prev_end
|
||||
prev_end = start+length
|
||||
except AssertionError:
|
||||
print "BAD:", self.dump()
|
||||
raise
|
||||
|
||||
def add(self, start, length):
|
||||
assert start >= 0
|
||||
assert length > 0
|
||||
#print " ADD [%d+%d -%d) to %s" % (start, length, start+length, self.dump())
|
||||
first_overlap = last_overlap = None
|
||||
for i,(s_start,s_length) in enumerate(self._spans):
|
||||
#print " (%d+%d)-> overlap=%s adjacent=%s" % (s_start,s_length, overlap(s_start, s_length, start, length), adjacent(s_start, s_length, start, length))
|
||||
if (overlap(s_start, s_length, start, length)
|
||||
or adjacent(s_start, s_length, start, length)):
|
||||
last_overlap = i
|
||||
if first_overlap is None:
|
||||
first_overlap = i
|
||||
continue
|
||||
# no overlap
|
||||
if first_overlap is not None:
|
||||
break
|
||||
#print " first_overlap", first_overlap, last_overlap
|
||||
if first_overlap is None:
|
||||
# no overlap, so just insert the span and sort by starting
|
||||
# position.
|
||||
self._spans.insert(0, (start,length))
|
||||
self._spans.sort()
|
||||
else:
|
||||
# everything from [first_overlap] to [last_overlap] overlapped
|
||||
first_start,first_length = self._spans[first_overlap]
|
||||
last_start,last_length = self._spans[last_overlap]
|
||||
newspan_start = min(start, first_start)
|
||||
newspan_end = max(start+length, last_start+last_length)
|
||||
newspan_length = newspan_end - newspan_start
|
||||
newspan = (newspan_start, newspan_length)
|
||||
self._spans[first_overlap:last_overlap+1] = [newspan]
|
||||
#print " ADD done: %s" % self.dump()
|
||||
self._check()
|
||||
|
||||
return self
|
||||
|
||||
def remove(self, start, length):
|
||||
assert start >= 0
|
||||
assert length > 0
|
||||
#print " REMOVE [%d+%d -%d) from %s" % (start, length, start+length, self.dump())
|
||||
first_complete_overlap = last_complete_overlap = None
|
||||
for i,(s_start,s_length) in enumerate(self._spans):
|
||||
s_end = s_start + s_length
|
||||
o = overlap(s_start, s_length, start, length)
|
||||
if o:
|
||||
o_start, o_length = o
|
||||
o_end = o_start+o_length
|
||||
if o_start == s_start and o_end == s_end:
|
||||
# delete this span altogether
|
||||
if first_complete_overlap is None:
|
||||
first_complete_overlap = i
|
||||
last_complete_overlap = i
|
||||
elif o_start == s_start:
|
||||
# we only overlap the left side, so trim the start
|
||||
# 1111
|
||||
# rrrr
|
||||
# oo
|
||||
# -> 11
|
||||
new_start = o_end
|
||||
new_end = s_end
|
||||
assert new_start > s_start
|
||||
new_length = new_end - new_start
|
||||
self._spans[i] = (new_start, new_length)
|
||||
elif o_end == s_end:
|
||||
# we only overlap the right side
|
||||
# 1111
|
||||
# rrrr
|
||||
# oo
|
||||
# -> 11
|
||||
new_start = s_start
|
||||
new_end = o_start
|
||||
assert new_end < s_end
|
||||
new_length = new_end - new_start
|
||||
self._spans[i] = (new_start, new_length)
|
||||
else:
|
||||
# we overlap the middle, so create a new span. No need to
|
||||
# examine any other spans.
|
||||
# 111111
|
||||
# rr
|
||||
# LL RR
|
||||
left_start = s_start
|
||||
left_end = o_start
|
||||
left_length = left_end - left_start
|
||||
right_start = o_end
|
||||
right_end = s_end
|
||||
right_length = right_end - right_start
|
||||
self._spans[i] = (left_start, left_length)
|
||||
self._spans.append( (right_start, right_length) )
|
||||
self._spans.sort()
|
||||
break
|
||||
if first_complete_overlap is not None:
|
||||
del self._spans[first_complete_overlap:last_complete_overlap+1]
|
||||
#print " REMOVE done: %s" % self.dump()
|
||||
self._check()
|
||||
return self
|
||||
|
||||
def dump(self):
|
||||
return "len=%d: %s" % (len(self),
|
||||
",".join(["[%d-%d]" % (start,start+l-1)
|
||||
for (start,l) in self._spans]) )
|
||||
|
||||
def each(self):
|
||||
for start, length in self._spans:
|
||||
for i in range(start, start+length):
|
||||
yield i
|
||||
|
||||
def __iter__(self):
|
||||
for s in self._spans:
|
||||
yield s
|
||||
|
||||
def __len__(self):
|
||||
# this also gets us bool(s)
|
||||
return sum([length for start,length in self._spans])
|
||||
|
||||
def __add__(self, other):
|
||||
s = self.__class__(self)
|
||||
for (start, length) in other:
|
||||
s.add(start, length)
|
||||
return s
|
||||
|
||||
def __sub__(self, other):
|
||||
s = self.__class__(self)
|
||||
for (start, length) in other:
|
||||
s.remove(start, length)
|
||||
return s
|
||||
|
||||
def __iadd__(self, other):
|
||||
for (start, length) in other:
|
||||
self.add(start, length)
|
||||
return self
|
||||
|
||||
def __isub__(self, other):
|
||||
for (start, length) in other:
|
||||
self.remove(start, length)
|
||||
return self
|
||||
|
||||
def __and__(self, other):
|
||||
if not self._spans:
|
||||
return self.__class__()
|
||||
bounds = self.__class__(self._spans[0][0],
|
||||
self._spans[-1][0]+self._spans[-1][1])
|
||||
not_other = bounds - other
|
||||
return self - not_other
|
||||
|
||||
def __contains__(self, (start,length)):
|
||||
for span_start,span_length in self._spans:
|
||||
o = overlap(start, length, span_start, span_length)
|
||||
if o:
|
||||
o_start,o_length = o
|
||||
if o_start == start and o_length == length:
|
||||
return True
|
||||
return False
|
||||
|
||||
def overlap(start0, length0, start1, length1):
|
||||
# return start2,length2 of the overlapping region, or None
|
||||
# 00 00 000 0000 00 00 000 00 00 00 00
|
||||
# 11 11 11 11 111 11 11 1111 111 11 11
|
||||
left = max(start0, start1)
|
||||
right = min(start0+length0, start1+length1)
|
||||
# if there is overlap, 'left' will be its start, and right-1 will
|
||||
# be the end'
|
||||
if left < right:
|
||||
return (left, right-left)
|
||||
return None
|
||||
|
||||
def adjacent(start0, length0, start1, length1):
|
||||
if (start0 < start1) and start0+length0 == start1:
|
||||
return True
|
||||
elif (start1 < start0) and start1+length1 == start0:
|
||||
return True
|
||||
return False
|
||||
|
||||
class DataSpans:
|
||||
"""I represent portions of a large string. Equivalently, I can be said to
|
||||
maintain a large array of characters (with gaps of empty elements). I can
|
||||
be used to manage access to a remote share, where some pieces have been
|
||||
retrieved, some have been requested, and others have not been read.
|
||||
"""
|
||||
|
||||
def __init__(self, other=None):
|
||||
self.spans = [] # (start, data) tuples, non-overlapping, merged
|
||||
if other:
|
||||
for (start, data) in other.get_chunks():
|
||||
self.add(start, data)
|
||||
|
||||
def __len__(self):
|
||||
# return number of bytes we're holding
|
||||
return sum([len(data) for (start,data) in self.spans])
|
||||
|
||||
def _dump(self):
|
||||
# return iterator of sorted list of offsets, one per byte
|
||||
for (start,data) in self.spans:
|
||||
for i in range(start, start+len(data)):
|
||||
yield i
|
||||
|
||||
def dump(self):
|
||||
return "len=%d: %s" % (len(self),
|
||||
",".join(["[%d-%d]" % (start,start+len(data)-1)
|
||||
for (start,data) in self.spans]) )
|
||||
|
||||
def get_chunks(self):
|
||||
return list(self.spans)
|
||||
|
||||
def get_spans(self):
|
||||
"""Return a Spans object with a bit set for each byte I hold"""
|
||||
return Spans([(start, len(data)) for (start,data) in self.spans])
|
||||
|
||||
def assert_invariants(self):
|
||||
if not self.spans:
|
||||
return
|
||||
prev_start = self.spans[0][0]
|
||||
prev_end = prev_start + len(self.spans[0][1])
|
||||
for start, data in self.spans[1:]:
|
||||
if not start > prev_end:
|
||||
# adjacent or overlapping: bad
|
||||
print "ASSERTION FAILED", self.spans
|
||||
raise AssertionError
|
||||
|
||||
def get(self, start, length):
|
||||
# returns a string of LENGTH, or None
|
||||
#print "get", start, length, self.spans
|
||||
end = start+length
|
||||
for (s_start,s_data) in self.spans:
|
||||
s_end = s_start+len(s_data)
|
||||
#print " ",s_start,s_end
|
||||
if s_start <= start < s_end:
|
||||
# we want some data from this span. Because we maintain
|
||||
# strictly merged and non-overlapping spans, everything we
|
||||
# want must be in this span.
|
||||
offset = start - s_start
|
||||
if offset + length > len(s_data):
|
||||
#print " None, span falls short"
|
||||
return None # span falls short
|
||||
#print " some", s_data[offset:offset+length]
|
||||
return s_data[offset:offset+length]
|
||||
if s_start >= end:
|
||||
# we've gone too far: no further spans will overlap
|
||||
#print " None, gone too far"
|
||||
return None
|
||||
#print " None, ran out of spans"
|
||||
return None
|
||||
|
||||
def add(self, start, data):
|
||||
# first: walk through existing spans, find overlap, modify-in-place
|
||||
# create list of new spans
|
||||
# add new spans
|
||||
# sort
|
||||
# merge adjacent spans
|
||||
#print "add", start, data, self.spans
|
||||
end = start + len(data)
|
||||
i = 0
|
||||
while len(data):
|
||||
#print " loop", start, data, i, len(self.spans), self.spans
|
||||
if i >= len(self.spans):
|
||||
#print " append and done"
|
||||
# append a last span
|
||||
self.spans.append( (start, data) )
|
||||
break
|
||||
(s_start,s_data) = self.spans[i]
|
||||
# five basic cases:
|
||||
# a: OLD b:OLDD c1:OLD c2:OLD d1:OLDD d2:OLD e: OLLDD
|
||||
# NEW NEW NEW NEWW NEW NEW NEW
|
||||
#
|
||||
# we handle A by inserting a new segment (with "N") and looping,
|
||||
# turning it into B or C. We handle B by replacing a prefix and
|
||||
# terminating. We handle C (both c1 and c2) by replacing the
|
||||
# segment (and, for c2, looping, turning it into A). We handle D
|
||||
# by replacing a suffix (and, for d2, looping, turning it into
|
||||
# A). We handle E by replacing the middle and terminating.
|
||||
if start < s_start:
|
||||
# case A: insert a new span, then loop with the remainder
|
||||
#print " insert new psan"
|
||||
s_len = s_start-start
|
||||
self.spans.insert(i, (start, data[:s_len]))
|
||||
i += 1
|
||||
start = s_start
|
||||
data = data[s_len:]
|
||||
continue
|
||||
s_len = len(s_data)
|
||||
s_end = s_start+s_len
|
||||
if s_start <= start < s_end:
|
||||
#print " modify this span", s_start, start, s_end
|
||||
# we want to modify some data in this span: a prefix, a
|
||||
# suffix, or the whole thing
|
||||
if s_start == start:
|
||||
if s_end <= end:
|
||||
#print " replace whole segment"
|
||||
# case C: replace this segment
|
||||
self.spans[i] = (s_start, data[:s_len])
|
||||
i += 1
|
||||
start += s_len
|
||||
data = data[s_len:]
|
||||
# C2 is where len(data)>0
|
||||
continue
|
||||
# case B: modify the prefix, retain the suffix
|
||||
#print " modify prefix"
|
||||
self.spans[i] = (s_start, data + s_data[len(data):])
|
||||
break
|
||||
if start > s_start and end < s_end:
|
||||
# case E: modify the middle
|
||||
#print " modify middle"
|
||||
prefix_len = start - s_start # we retain this much
|
||||
suffix_len = s_end - end # and retain this much
|
||||
newdata = s_data[:prefix_len] + data + s_data[-suffix_len:]
|
||||
self.spans[i] = (s_start, newdata)
|
||||
break
|
||||
# case D: retain the prefix, modify the suffix
|
||||
#print " modify suffix"
|
||||
prefix_len = start - s_start # we retain this much
|
||||
suffix_len = s_len - prefix_len # we replace this much
|
||||
#print " ", s_data, prefix_len, suffix_len, s_len, data
|
||||
self.spans[i] = (s_start,
|
||||
s_data[:prefix_len] + data[:suffix_len])
|
||||
i += 1
|
||||
start += suffix_len
|
||||
data = data[suffix_len:]
|
||||
#print " now", start, data
|
||||
# D2 is where len(data)>0
|
||||
continue
|
||||
# else we're not there yet
|
||||
#print " still looking"
|
||||
i += 1
|
||||
continue
|
||||
# now merge adjacent spans
|
||||
#print " merging", self.spans
|
||||
newspans = []
|
||||
for (s_start,s_data) in self.spans:
|
||||
if newspans and adjacent(newspans[-1][0], len(newspans[-1][1]),
|
||||
s_start, len(s_data)):
|
||||
newspans[-1] = (newspans[-1][0], newspans[-1][1] + s_data)
|
||||
else:
|
||||
newspans.append( (s_start, s_data) )
|
||||
self.spans = newspans
|
||||
self.assert_invariants()
|
||||
#print " done", self.spans
|
||||
|
||||
def remove(self, start, length):
|
||||
i = 0
|
||||
end = start + length
|
||||
#print "remove", start, length, self.spans
|
||||
while i < len(self.spans):
|
||||
(s_start,s_data) = self.spans[i]
|
||||
if s_start >= end:
|
||||
# this segment is entirely right of the removed region, and
|
||||
# all further segments are even further right. We're done.
|
||||
break
|
||||
s_len = len(s_data)
|
||||
s_end = s_start + s_len
|
||||
o = overlap(start, length, s_start, s_len)
|
||||
if not o:
|
||||
i += 1
|
||||
continue
|
||||
o_start, o_len = o
|
||||
o_end = o_start + o_len
|
||||
if o_len == s_len:
|
||||
# remove the whole segment
|
||||
del self.spans[i]
|
||||
continue
|
||||
if o_start == s_start:
|
||||
# remove a prefix, leaving the suffix from o_end to s_end
|
||||
prefix_len = o_end - o_start
|
||||
self.spans[i] = (o_end, s_data[prefix_len:])
|
||||
i += 1
|
||||
continue
|
||||
elif o_end == s_end:
|
||||
# remove a suffix, leaving the prefix from s_start to o_start
|
||||
prefix_len = o_start - s_start
|
||||
self.spans[i] = (s_start, s_data[:prefix_len])
|
||||
i += 1
|
||||
continue
|
||||
# remove the middle, creating a new segment
|
||||
# left is s_start:o_start, right is o_end:s_end
|
||||
left_len = o_start - s_start
|
||||
left = s_data[:left_len]
|
||||
right_len = s_end - o_end
|
||||
right = s_data[-right_len:]
|
||||
self.spans[i] = (s_start, left)
|
||||
self.spans.insert(i+1, (o_end, right))
|
||||
break
|
||||
#print " done", self.spans
|
||||
|
||||
def pop(self, start, length):
|
||||
data = self.get(start, length)
|
||||
if data:
|
||||
self.remove(start, length)
|
||||
return data
|
Loading…
Reference in New Issue
Block a user