pyfec: add easyfec wrapper which takes a single string and splits it into input shares and pads, then passes it on to the inner fec object

2025-04-13 05:43:03 +00:00 · 2007-03-27 20:14:30 -07:00 · 2007-03-27 20:14:30 -07:00 · 3c5d50797f
commit 3c5d50797f
parent f3831d979c
3 changed files with 99 additions and 8 deletions
--- a/pyfec/fec/easyfec.py
+++ b/pyfec/fec/easyfec.py
@ -0,0 +1,28 @@
+import fec
+
+# div_ceil() was copied from the pyutil library.
+def div_ceil(n, d):
+    """
+    The smallest integer k such that k*d >= n.
+    """
+    return (n/d) + (n%d != 0)
+
+
+class Encoder(object):
+    def __init__(self, k, m):
+        self.fec = fec.Encoder(k, m)
+
+    def encode(self, data):
+        """
+        @param data: string
+        """
+        chunksize = div_ceil(len(data), self.fec.k)
+        numchunks = div_ceil(len(data), chunksize)
+        l = [ data[i:i+chunksize] for i in range(numchunks) ]
+        if len(l[-1]) != len(l[0]):
+            l[-1] = l[-1] + ('\x00'*(len(l[0])-len(l[-1])))
+        return self.fec.encode(l)
+        
+    def decode(self, shares):
+        return self.fec.decode(shares)
+        
--- a/pyfec/fec/filefec.py
+++ b/pyfec/fec/filefec.py
@ -23,10 +23,34 @@
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

-import fec
+import easyfec, fec

 import array, random

+def encode_to_files_easyfec(inf, prefix, k, m):
+    """
+    Encode inf, writing the shares to named $prefix+$shareid.
+    """
+    l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ]
+    def cb(shares, length):
+        assert len(shares) == len(l)
+        for i in range(len(shares)):
+            l[i].write(shares[i])
+
+    encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096)
+ 
+def encode_to_files_stringy(inf, prefix, k, m):
+    """
+    Encode inf, writing the shares to named $prefix+$shareid.
+    """
+    l = [ open(prefix+str(shareid), "wb") for shareid in range(m) ]
+    def cb(shares, length):
+        assert len(shares) == len(l)
+        for i in range(len(shares)):
+            l[i].write(shares[i])
+
+    encode_file_stringy(inf, cb, k, m, chunksize=4096)
+ 
 def encode_to_files(inf, prefix, k, m):
    """
    Encode inf, writing the shares to named $prefix+$shareid.
@ -214,3 +238,34 @@ def encode_file_not_really(inf, cb, k, m, chunksize=4096):
        # res = enc.encode(l)
        # print "...finished to encode()"
        cb(l, indatasize)
+
+def encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096):
+    """
+    Read in the contents of inf, encode, and call cb with the results.
+
+    First, chunksize*k bytes will be read from inf, then encoded into m
+    "result shares".  Then cb will be invoked, passing a list of the m result
+    shares as its first argument, and the length of the encoded data as its
+    second argument.  (The length of the encoded data is always equal to
+    k*chunksize, until the last iteration, when the end of the file has been
+    reached and less than k*chunksize bytes could be read from the file.)
+    This procedure is iterated until the end of the file is reached, in which
+    case the space of the input that is unused is filled with zeroes before
+    encoding.
+
+    @param inf the file object from which to read the data
+    @param cb the callback to be invoked with the results
+    @param k the number of shares required to reconstruct the file
+    @param m the total number of shares created
+    @param chunksize how much data to read from inf for each of the k input 
+        shares
+    """
+    enc = easyfec.Encoder(k, m)
+
+    indatasize = k*chunksize # will be reset to shorter upon EOF
+    indata = inf.read(indatasize)
+    while indata:
+        res = enc.encode(indata)
+        cb(res, indatasize)
+        indata = inf.read(indatasize)
+
--- a/pyfec/fec/test/bench_pyfec.py
+++ b/pyfec/fec/test/bench_pyfec.py
@ -27,14 +27,22 @@ import fec

 import array, random

-def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
-    FILESIZE=1000000
+def f_easyfec(filesize):
+    return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_easyfec)
+    
+def f_fec_stringy(filesize):
+    return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_stringy)
+    
+def f_fec(filesize):
+    return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files)
+    
+def bench_encode_to_files_shuffle_decode_from_files(filesize=1000000, verbose=False, encodefunc=fec.filefec.encode_to_files):
    CHUNKSIZE=4096
    PREFIX="testshare"
    K=25
    M=100
    import os, time
-    left=FILESIZE
+    left=filesize
    outfile = open("tmpranddata", "wb")
    try:
        while left:
@ -45,10 +53,10 @@ def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
        outfile = None
        infile = open("tmpranddata", "rb")
        st = time.time()
-        fec.filefec.encode_to_files(infile, PREFIX, K, M)
+        encodefunc(infile, PREFIX, K, M)
        so = time.time()
        if verbose:
-            print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, M, so-st, FILESIZE/((so-st)*1000000),)
+            print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, M, so-st, filesize/((so-st)*filesize),)
        enctime = so-st
        # Now delete m-k of the tempfiles at random.
        tempfs = [ f for f in os.listdir(".") if f.startswith(PREFIX) ]
@ -57,10 +65,10 @@ def bench_encode_to_files_shuffle_decode_from_files(verbose=False):
            os.remove(victimtempf)
        recoveredfile = open("tmpranddata-recovered", "wb")
        st = time.time()
-        fec.filefec.decode_from_files(recoveredfile, 1000000, PREFIX, K, M)
+        fec.filefec.decode_from_files(recoveredfile, filesize, PREFIX, K, M)
        so = time.time()
        if verbose:
-            print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, K, so-st, FILESIZE/((so-st)*1000000),)
+            print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, K, so-st, filesize/((so-st)*filesize),)
        return enctime + (so-st)
    finally:
        # os.remove("tmpranddata")