mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-01-20 03:36:25 +00:00
pyfec: add easyfec wrapper which takes a single string and splits it into input shares and pads, then passes it on to the inner fec object
This commit is contained in:
parent
f3831d979c
commit
3c5d50797f
28
pyfec/fec/easyfec.py
Normal file
28
pyfec/fec/easyfec.py
Normal file
@ -0,0 +1,28 @@
|
||||
import fec
|
||||
|
||||
# div_ceil() was copied from the pyutil library.
|
||||
def div_ceil(n, d):
|
||||
"""
|
||||
The smallest integer k such that k*d >= n.
|
||||
"""
|
||||
return (n/d) + (n%d != 0)
|
||||
|
||||
|
||||
class Encoder(object):
|
||||
def __init__(self, k, m):
|
||||
self.fec = fec.Encoder(k, m)
|
||||
|
||||
def encode(self, data):
|
||||
"""
|
||||
@param data: string
|
||||
"""
|
||||
chunksize = div_ceil(len(data), self.fec.k)
|
||||
numchunks = div_ceil(len(data), chunksize)
|
||||
l = [ data[i:i+chunksize] for i in range(numchunks) ]
|
||||
if len(l[-1]) != len(l[0]):
|
||||
l[-1] = l[-1] + ('\x00'*(len(l[0])-len(l[-1])))
|
||||
return self.fec.encode(l)
|
||||
|
||||
def decode(self, shares):
|
||||
return self.fec.decode(shares)
|
||||
|
@ -23,10 +23,34 @@
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
import fec
|
||||
import easyfec, fec
|
||||
|
||||
import array, random
|
||||
|
||||
def encode_to_files_easyfec(inf, prefix, k, m):
|
||||
"""
|
||||
Encode inf, writing the shares to named $prefix+$shareid.
|
||||
"""
|
||||
l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ]
|
||||
def cb(shares, length):
|
||||
assert len(shares) == len(l)
|
||||
for i in range(len(shares)):
|
||||
l[i].write(shares[i])
|
||||
|
||||
encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096)
|
||||
|
||||
def encode_to_files_stringy(inf, prefix, k, m):
|
||||
"""
|
||||
Encode inf, writing the shares to named $prefix+$shareid.
|
||||
"""
|
||||
l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ]
|
||||
def cb(shares, length):
|
||||
assert len(shares) == len(l)
|
||||
for i in range(len(shares)):
|
||||
l[i].write(shares[i])
|
||||
|
||||
encode_file_stringy(inf, cb, k, m, chunksize=4096)
|
||||
|
||||
def encode_to_files(inf, prefix, k, m):
|
||||
"""
|
||||
Encode inf, writing the shares to named $prefix+$shareid.
|
||||
@ -214,3 +238,34 @@ def encode_file_not_really(inf, cb, k, m, chunksize=4096):
|
||||
# res = enc.encode(l)
|
||||
# print "...finished to encode()"
|
||||
cb(l, indatasize)
|
||||
|
||||
def encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096):
|
||||
"""
|
||||
Read in the contents of inf, encode, and call cb with the results.
|
||||
|
||||
First, chunksize*k bytes will be read from inf, then encoded into m
|
||||
"result shares". Then cb will be invoked, passing a list of the m result
|
||||
shares as its first argument, and the length of the encoded data as its
|
||||
second argument. (The length of the encoded data is always equal to
|
||||
k*chunksize, until the last iteration, when the end of the file has been
|
||||
reached and less than k*chunksize bytes could be read from the file.)
|
||||
This procedure is iterated until the end of the file is reached, in which
|
||||
case the space of the input that is unused is filled with zeroes before
|
||||
encoding.
|
||||
|
||||
@param inf the file object from which to read the data
|
||||
@param cb the callback to be invoked with the results
|
||||
@param k the number of shares required to reconstruct the file
|
||||
@param m the total number of shares created
|
||||
@param chunksize how much data to read from inf for each of the k input
|
||||
shares
|
||||
"""
|
||||
enc = easyfec.Encoder(k, m)
|
||||
|
||||
indatasize = k*chunksize # will be reset to shorter upon EOF
|
||||
indata = inf.read(indatasize)
|
||||
while indata:
|
||||
res = enc.encode(indata)
|
||||
cb(res, indatasize)
|
||||
indata = inf.read(indatasize)
|
||||
|
||||
|
@ -27,14 +27,22 @@ import fec
|
||||
|
||||
import array, random
|
||||
|
||||
def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
|
||||
FILESIZE=1000000
|
||||
def f_easyfec(filesize):
|
||||
return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_easyfec)
|
||||
|
||||
def f_fec_stringy(filesize):
|
||||
return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_stringy)
|
||||
|
||||
def f_fec(filesize):
|
||||
return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files)
|
||||
|
||||
def bench_encode_to_files_shuffle_decode_from_files(filesize=1000000, verbose=False, encodefunc=fec.filefec.encode_to_files):
|
||||
CHUNKSIZE=4096
|
||||
PREFIX="testshare"
|
||||
K=25
|
||||
M=100
|
||||
import os, time
|
||||
left=FILESIZE
|
||||
left=filesize
|
||||
outfile = open("tmpranddata", "wb")
|
||||
try:
|
||||
while left:
|
||||
@ -45,10 +53,10 @@ def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
|
||||
outfile = None
|
||||
infile = open("tmpranddata", "rb")
|
||||
st = time.time()
|
||||
fec.filefec.encode_to_files(infile, PREFIX, K, M)
|
||||
encodefunc(infile, PREFIX, K, M)
|
||||
so = time.time()
|
||||
if verbose:
|
||||
print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, M, so-st, FILESIZE/((so-st)*1000000),)
|
||||
print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, M, so-st, filesize/((so-st)*filesize),)
|
||||
enctime = so-st
|
||||
# Now delete m-k of the tempfiles at random.
|
||||
tempfs = [ f for f in os.listdir(".") if f.startswith(PREFIX) ]
|
||||
@ -57,10 +65,10 @@ def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
|
||||
os.remove(victimtempf)
|
||||
recoveredfile = open("tmpranddata-recovered", "wb")
|
||||
st = time.time()
|
||||
fec.filefec.decode_from_files(recoveredfile, 1000000, PREFIX, K, M)
|
||||
fec.filefec.decode_from_files(recoveredfile, filesize, PREFIX, K, M)
|
||||
so = time.time()
|
||||
if verbose:
|
||||
print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, K, so-st, FILESIZE/((so-st)*1000000),)
|
||||
print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, K, so-st, filesize/((so-st)*filesize),)
|
||||
return enctime + (so-st)
|
||||
finally:
|
||||
# os.remove("tmpranddata")
|
||||
|
Loading…
Reference in New Issue
Block a user