2007-01-26 01:02:16 +00:00
|
|
|
# pyfec -- fast forward error correction library with Python interface
|
|
|
|
#
|
|
|
|
# Copyright (C) 2007 Allmydata, Inc.
|
|
|
|
# Author: Zooko Wilcox-O'Hearn
|
|
|
|
# mailto:zooko@zooko.com
|
|
|
|
#
|
|
|
|
# This file is part of pyfec.
|
|
|
|
#
|
2007-03-28 03:14:06 +00:00
|
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
|
|
# under the terms of the GNU General Public License as published by the Free
|
|
|
|
# Software Foundation; either version 2 of the License, or (at your option)
|
|
|
|
# any later version. This program also comes with the added permission that,
|
|
|
|
# in the case that you are obligated to release a derived work under this
|
|
|
|
# licence (as per section 2.b of the GPL), you may delay the fulfillment of
|
|
|
|
# this obligation for up to 12 months.
|
2007-01-26 01:02:16 +00:00
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
2007-03-28 03:14:30 +00:00
|
|
|
import easyfec, fec
|
2007-01-25 22:25:19 +00:00
|
|
|
|
2007-01-30 17:37:35 +00:00
|
|
|
import array, random
|
2007-01-25 22:25:19 +00:00
|
|
|
|
2007-03-28 03:14:30 +00:00
|
|
|
def encode_to_files_easyfec(inf, prefix, k, m):
|
|
|
|
"""
|
2007-03-30 18:52:43 +00:00
|
|
|
Encode inf, writing the shares to a file named $prefix+$sharenum.
|
2007-03-28 03:14:30 +00:00
|
|
|
"""
|
2007-03-30 18:52:43 +00:00
|
|
|
l = [ open(prefix+str(sharenum), "wb") for sharenum in range(m) ]
|
|
|
|
def cb(blocks, length):
|
|
|
|
assert len(blocks) == len(l)
|
|
|
|
for i in range(len(blocks)):
|
|
|
|
l[i].write(blocks[i])
|
2007-03-28 03:14:30 +00:00
|
|
|
|
|
|
|
encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096)
|
|
|
|
|
|
|
|
def encode_to_files_stringy(inf, prefix, k, m):
|
|
|
|
"""
|
2007-03-30 18:52:43 +00:00
|
|
|
Encode inf, writing the shares to a file named named $prefix+$sharenum.
|
2007-03-28 03:14:30 +00:00
|
|
|
"""
|
2007-03-30 18:52:43 +00:00
|
|
|
l = [ open(prefix+str(sharenum), "wb") for sharenum in range(m) ]
|
|
|
|
def cb(blocks, length):
|
|
|
|
assert len(blocks) == len(l)
|
|
|
|
for i in range(len(blocks)):
|
|
|
|
l[i].write(blocks[i])
|
2007-03-28 03:14:30 +00:00
|
|
|
|
|
|
|
encode_file_stringy(inf, cb, k, m, chunksize=4096)
|
|
|
|
|
2007-01-27 03:15:27 +00:00
|
|
|
def encode_to_files(inf, prefix, k, m):
|
|
|
|
"""
|
2007-03-30 18:52:43 +00:00
|
|
|
Encode inf, writing the shares to named $prefix+$sharenum.
|
2007-01-27 03:15:27 +00:00
|
|
|
"""
|
2007-03-30 18:52:43 +00:00
|
|
|
l = [ open(prefix+str(sharenum), "wb") for sharenum in range(m) ]
|
|
|
|
def cb(blocks, length):
|
|
|
|
assert len(blocks) == len(l)
|
|
|
|
for i in range(len(blocks)):
|
|
|
|
l[i].write(blocks[i])
|
2007-01-27 03:15:27 +00:00
|
|
|
|
|
|
|
encode_file(inf, cb, k, m, chunksize=4096)
|
|
|
|
|
2007-01-30 17:37:35 +00:00
|
|
|
def decode_from_files(outf, filesize, prefix, k, m):
|
2007-01-27 03:15:27 +00:00
|
|
|
"""
|
|
|
|
Decode from the first k files in the current directory whose names begin
|
|
|
|
with prefix, writing the results to outf.
|
|
|
|
"""
|
|
|
|
import os
|
|
|
|
infs = []
|
2007-03-30 18:52:43 +00:00
|
|
|
sharenums = []
|
2007-01-30 17:37:35 +00:00
|
|
|
listd = os.listdir(".")
|
|
|
|
random.shuffle(listd)
|
|
|
|
for f in listd:
|
2007-01-27 03:15:27 +00:00
|
|
|
if f.startswith(prefix):
|
|
|
|
infs.append(open(f, "rb"))
|
2007-03-30 18:52:43 +00:00
|
|
|
sharenums.append(int(f[len(prefix):]))
|
2007-01-27 03:15:27 +00:00
|
|
|
if len(infs) == k:
|
|
|
|
break
|
|
|
|
|
|
|
|
CHUNKSIZE = 4096
|
|
|
|
dec = fec.Decoder(k, m)
|
|
|
|
while True:
|
|
|
|
x = [ inf.read(CHUNKSIZE) for inf in infs ]
|
2007-03-30 18:52:43 +00:00
|
|
|
decblocks = dec.decode(x, sharenums)
|
|
|
|
for decblock in decblocks:
|
|
|
|
if len(decblock) == 0:
|
2007-03-28 03:14:06 +00:00
|
|
|
raise "error -- probably share was too short -- was it stored in a file which got truncated? chunksizes: %s" % ([len(chunk) for chunk in x],)
|
2007-03-30 18:52:43 +00:00
|
|
|
if filesize >= len(decblock):
|
|
|
|
outf.write(decblock)
|
|
|
|
filesize -= len(decblock)
|
|
|
|
# print "filesize is now %s after subtracting %s" % (filesize, len(decblock),)
|
2007-01-30 17:37:35 +00:00
|
|
|
else:
|
2007-03-30 18:52:43 +00:00
|
|
|
outf.write(decblock[:filesize])
|
2007-01-30 17:37:35 +00:00
|
|
|
return
|
2007-01-27 03:15:27 +00:00
|
|
|
|
2007-01-25 22:25:19 +00:00
|
|
|
def encode_file(inf, cb, k, m, chunksize=4096):
|
|
|
|
"""
|
|
|
|
Read in the contents of inf, encode, and call cb with the results.
|
|
|
|
|
2007-03-30 18:52:43 +00:00
|
|
|
First, k "input blocks" will be read from inf, each input block being of
|
|
|
|
size chunksize. Then these k blocks will be encoded into m "result
|
|
|
|
blocks". Then cb will be invoked, passing a list of the m result blocks
|
2007-01-25 22:25:19 +00:00
|
|
|
as its first argument, and the length of the encoded data as its second
|
|
|
|
argument. (The length of the encoded data is always equal to k*chunksize,
|
|
|
|
until the last iteration, when the end of the file has been reached and
|
|
|
|
less than k*chunksize bytes could be read from the file.) This procedure
|
|
|
|
is iterated until the end of the file is reached, in which case the space
|
2007-03-30 18:52:43 +00:00
|
|
|
of the input blocks that is unused is filled with zeroes before encoding.
|
2007-01-25 22:25:19 +00:00
|
|
|
|
|
|
|
Note that the sequence passed in calls to cb() contains mutable array
|
|
|
|
objects in its first k elements whose contents will be overwritten when
|
|
|
|
the next segment is read from the input file. Therefore the
|
|
|
|
implementation of cb() has to either be finished with those first k arrays
|
|
|
|
before returning, or if it wants to keep the contents of those arrays for
|
|
|
|
subsequent use after it has returned then it must make a copy of them to
|
|
|
|
keep.
|
|
|
|
|
|
|
|
@param inf the file object from which to read the data
|
|
|
|
@param cb the callback to be invoked with the results
|
|
|
|
@param k the number of shares required to reconstruct the file
|
|
|
|
@param m the total number of shares created
|
|
|
|
@param chunksize how much data to read from inf for each of the k input
|
2007-03-30 18:52:43 +00:00
|
|
|
blocks
|
2007-01-25 22:25:19 +00:00
|
|
|
"""
|
|
|
|
enc = fec.Encoder(k, m)
|
|
|
|
l = tuple([ array.array('c') for i in range(k) ])
|
|
|
|
indatasize = k*chunksize # will be reset to shorter upon EOF
|
|
|
|
ZEROES=array.array('c', ['\x00'])*chunksize
|
|
|
|
while indatasize == k*chunksize:
|
|
|
|
# This loop body executes once per segment.
|
|
|
|
i = 0
|
|
|
|
while (i<len(l)):
|
|
|
|
# This loop body executes once per chunk.
|
|
|
|
a = l[i]
|
|
|
|
i += 1
|
|
|
|
del a[:]
|
|
|
|
try:
|
|
|
|
a.fromfile(inf, chunksize)
|
|
|
|
except EOFError:
|
|
|
|
indatasize = i*chunksize + len(a)
|
|
|
|
|
|
|
|
# padding
|
|
|
|
a.fromstring("\x00" * (chunksize-len(a)))
|
|
|
|
while (i<len(l)):
|
2007-01-30 17:37:35 +00:00
|
|
|
a = l[i]
|
2007-01-25 22:25:19 +00:00
|
|
|
a[:] = ZEROES
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
# print "about to encode()... len(l[0]): %s, l[0]: %s" % (len(l[0]), type(l[0]),),
|
|
|
|
res = enc.encode(l)
|
|
|
|
# print "...finished to encode()"
|
|
|
|
cb(res, indatasize)
|
|
|
|
|
|
|
|
def encode_file_stringy(inf, cb, k, m, chunksize=4096):
|
|
|
|
"""
|
|
|
|
Read in the contents of inf, encode, and call cb with the results.
|
|
|
|
|
2007-03-30 18:52:43 +00:00
|
|
|
First, k "input blocks" will be read from inf, each input block being of
|
|
|
|
size chunksize. Then these k blocks will be encoded into m "result
|
|
|
|
blocks". Then cb will be invoked, passing a list of the m result blocks
|
2007-01-25 22:25:19 +00:00
|
|
|
as its first argument, and the length of the encoded data as its second
|
|
|
|
argument. (The length of the encoded data is always equal to k*chunksize,
|
|
|
|
until the last iteration, when the end of the file has been reached and
|
|
|
|
less than k*chunksize bytes could be read from the file.) This procedure
|
2007-03-30 18:52:43 +00:00
|
|
|
is iterated until the end of the file is reached, in which case the part
|
2007-01-25 22:25:19 +00:00
|
|
|
of the input shares that is unused is filled with zeroes before encoding.
|
|
|
|
|
|
|
|
@param inf the file object from which to read the data
|
|
|
|
@param cb the callback to be invoked with the results
|
|
|
|
@param k the number of shares required to reconstruct the file
|
|
|
|
@param m the total number of shares created
|
|
|
|
@param chunksize how much data to read from inf for each of the k input
|
2007-03-30 18:52:43 +00:00
|
|
|
blocks
|
2007-01-25 22:25:19 +00:00
|
|
|
"""
|
|
|
|
enc = fec.Encoder(k, m)
|
|
|
|
indatasize = k*chunksize # will be reset to shorter upon EOF
|
|
|
|
while indatasize == k*chunksize:
|
|
|
|
# This loop body executes once per segment.
|
|
|
|
i = 0
|
2007-01-30 17:37:35 +00:00
|
|
|
l = []
|
2007-01-25 22:25:19 +00:00
|
|
|
ZEROES = '\x00'*chunksize
|
|
|
|
while i<k:
|
|
|
|
# This loop body executes once per chunk.
|
|
|
|
i += 1
|
|
|
|
l.append(inf.read(chunksize))
|
|
|
|
if len(l[-1]) < chunksize:
|
|
|
|
indatasize = i*chunksize + len(l[-1])
|
|
|
|
|
|
|
|
# padding
|
|
|
|
l[-1] = l[-1] + "\x00" * (chunksize-len(l[-1]))
|
|
|
|
while i<k:
|
|
|
|
l.append(ZEROES)
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
# print "about to encode()... len(l[0]): %s, l[0]: %s" % (len(l[0]), type(l[0]),),
|
|
|
|
res = enc.encode(l)
|
|
|
|
# print "...finished to encode()"
|
|
|
|
cb(res, indatasize)
|
|
|
|
|
2007-01-26 00:50:26 +00:00
|
|
|
def encode_file_not_really(inf, cb, k, m, chunksize=4096):
|
|
|
|
"""
|
|
|
|
Read in the contents of inf, and call cb with the results.
|
|
|
|
|
|
|
|
@param inf the file object from which to read the data
|
|
|
|
@param cb the callback to be invoked with the results
|
|
|
|
@param k the number of shares required to reconstruct the file
|
|
|
|
@param m the total number of shares created
|
|
|
|
@param chunksize how much data to read from inf for each of the k input
|
2007-03-30 18:52:43 +00:00
|
|
|
blocks
|
2007-01-26 00:50:26 +00:00
|
|
|
"""
|
|
|
|
enc = fec.Encoder(k, m)
|
|
|
|
l = tuple([ array.array('c') for i in range(k) ])
|
|
|
|
indatasize = k*chunksize # will be reset to shorter upon EOF
|
|
|
|
ZEROES=array.array('c', ['\x00'])*chunksize
|
|
|
|
while indatasize == k*chunksize:
|
|
|
|
# This loop body executes once per segment.
|
|
|
|
i = 0
|
|
|
|
while (i<len(l)):
|
|
|
|
# This loop body executes once per chunk.
|
|
|
|
a = l[i]
|
|
|
|
i += 1
|
|
|
|
del a[:]
|
|
|
|
try:
|
|
|
|
a.fromfile(inf, chunksize)
|
|
|
|
except EOFError:
|
|
|
|
indatasize = i*chunksize + len(a)
|
|
|
|
|
|
|
|
# padding
|
|
|
|
a.fromstring("\x00" * (chunksize-len(a)))
|
|
|
|
while (i<len(l)):
|
|
|
|
a[:] = ZEROES
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
# print "about to encode()... len(l[0]): %s, l[0]: %s" % (len(l[0]), type(l[0]),),
|
|
|
|
# res = enc.encode(l)
|
|
|
|
# print "...finished to encode()"
|
|
|
|
cb(l, indatasize)
|
2007-03-28 03:14:30 +00:00
|
|
|
|
|
|
|
def encode_file_stringy_easyfec(inf, cb, k, m, chunksize=4096):
|
|
|
|
"""
|
|
|
|
Read in the contents of inf, encode, and call cb with the results.
|
|
|
|
|
|
|
|
First, chunksize*k bytes will be read from inf, then encoded into m
|
2007-03-30 18:52:43 +00:00
|
|
|
"result blocks". Then cb will be invoked, passing a list of the m result
|
|
|
|
blocks as its first argument, and the length of the encoded data as its
|
2007-03-28 03:14:30 +00:00
|
|
|
second argument. (The length of the encoded data is always equal to
|
|
|
|
k*chunksize, until the last iteration, when the end of the file has been
|
|
|
|
reached and less than k*chunksize bytes could be read from the file.)
|
|
|
|
This procedure is iterated until the end of the file is reached, in which
|
|
|
|
case the space of the input that is unused is filled with zeroes before
|
|
|
|
encoding.
|
|
|
|
|
|
|
|
@param inf the file object from which to read the data
|
|
|
|
@param cb the callback to be invoked with the results
|
|
|
|
@param k the number of shares required to reconstruct the file
|
|
|
|
@param m the total number of shares created
|
|
|
|
@param chunksize how much data to read from inf for each of the k input
|
2007-03-30 18:52:43 +00:00
|
|
|
blocks
|
2007-03-28 03:14:30 +00:00
|
|
|
"""
|
|
|
|
enc = easyfec.Encoder(k, m)
|
|
|
|
|
|
|
|
indatasize = k*chunksize # will be reset to shorter upon EOF
|
|
|
|
indata = inf.read(indatasize)
|
|
|
|
while indata:
|
|
|
|
res = enc.encode(indata)
|
|
|
|
cb(res, indatasize)
|
|
|
|
indata = inf.read(indatasize)
|
|
|
|
|