mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-04-07 10:56:49 +00:00
zfec: finish up some renaming of pyfec to zfec
This commit is contained in:
parent
224a66dbde
commit
0da053eda2
@ -19,7 +19,7 @@ which is a mature and optimized implementation of erasure coding. The zfec
|
||||
package makes several changes from the original "fec" package, including
|
||||
addition of the Python API, refactoring of the C API to support zero-copy
|
||||
operation, a few clean-ups and micro-optimizations of the core code itself,
|
||||
and the addition of a command-line tool named "fec".
|
||||
and the addition of a command-line tool named "zfec".
|
||||
|
||||
|
||||
* Community
|
||||
@ -52,7 +52,7 @@ and k is required to be at least 1 and at most m.
|
||||
degenerates to the equivalent of the Unix "split" utility which simply splits
|
||||
the input into successive segments. Similarly, when k == 1 it degenerates to
|
||||
the equivalent of the unix "cp" utility -- each block is a complete copy of the
|
||||
input data. The "fec" command-line tool does not implement these degenerate
|
||||
input data. The "zfec" command-line tool does not implement these degenerate
|
||||
cases.)
|
||||
|
||||
Note that each "primary block" is a segment of the original data, so its size
|
||||
@ -63,9 +63,9 @@ the same size as all the others). In addition to the data contained in the
|
||||
blocks themselves there are also a few pieces of metadata which are necessary
|
||||
for later reconstruction. Those pieces are: 1. the value of K, 2. the value
|
||||
of M, 3. the sharenum of each block, 4. the number of bytes of padding
|
||||
that were used. The "fec" command-line tool compresses these pieces of data
|
||||
that were used. The "zfec" command-line tool compresses these pieces of data
|
||||
and prepends them to the beginning of each share, so each the sharefile
|
||||
produced by the "fec" command-line tool is between one and four bytes larger
|
||||
produced by the "zfec" command-line tool is between one and four bytes larger
|
||||
than the share data alone.
|
||||
|
||||
The decoding step requires as input k of the blocks which were produced by the
|
||||
@ -75,12 +75,12 @@ input to the encoding step.
|
||||
|
||||
* Command-Line Tool
|
||||
|
||||
The bin/ directory contains two Unix-style, command-line tools "fec" and
|
||||
"unfec". Execute "fec --help" or "unfec --help" for usage instructions.
|
||||
The bin/ directory contains two Unix-style, command-line tools "zfec" and
|
||||
"zunfec". Execute "zfec --help" or "zunfec --help" for usage instructions.
|
||||
|
||||
Note: a Unix-style tool like "fec" does only one thing -- in this case
|
||||
Note: a Unix-style tool like "zfec" does only one thing -- in this case
|
||||
erasure coding -- and leaves other tasks to other tools. Other Unix-style
|
||||
tools that go well with fec include "GNU tar" for archiving multiple files and
|
||||
tools that go well with zfec include "GNU tar" for archiving multiple files and
|
||||
directories into one file, "rzip" for compression, "GNU Privacy Guard" for
|
||||
encryption, and "sha256sum" for integrity. It is important to do things in
|
||||
order: first archive, then compress, then either encrypt or sha256sum, then
|
||||
@ -90,10 +90,10 @@ also ensure integrity, so the use of sha256sum is unnecessary in that case.
|
||||
|
||||
* Performance Measurements
|
||||
|
||||
On my Athlon 64 2.4 GHz workstation (running Linux), the "fec" command-line
|
||||
On my Athlon 64 2.4 GHz workstation (running Linux), the "zfec" command-line
|
||||
tool encoded a 160 MB file with m=100, k=94 (about 6% redundancy) in 3.9
|
||||
seconds, where the "par2" tool encoded the file with about 6% redundancy in
|
||||
27 seconds. "fec" encoded the same file with m=12, k=6 (100% redundancy) in
|
||||
27 seconds. zfec encoded the same file with m=12, k=6 (100% redundancy) in
|
||||
4.1 seconds, where par2 encoded it with about 100% redundancy in 7 minutes
|
||||
and 56 seconds.
|
||||
|
||||
@ -182,8 +182,8 @@ objects (e.g. Python strings) to hold the data that you pass to zfec.
|
||||
* Utilities
|
||||
|
||||
The filefec.py module has a utility function for efficiently reading a file
|
||||
and encoding it piece by piece. This module is used by the "fec" and "unfec"
|
||||
command-line tools from the bin/ directory.
|
||||
and encoding it piece by piece. This module is used by the "zfec" and
|
||||
"zunfec" command-line tools from the bin/ directory.
|
||||
|
||||
|
||||
* Dependencies
|
||||
|
@ -277,7 +277,7 @@ static PyMemberDef Encoder_members[] = {
|
||||
static PyTypeObject Encoder_type = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"fec.Encoder", /*tp_name*/
|
||||
"_fec.Encoder", /*tp_name*/
|
||||
sizeof(Encoder), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)Encoder_dealloc, /*tp_dealloc*/
|
||||
@ -541,7 +541,7 @@ static PyMemberDef Decoder_members[] = {
|
||||
static PyTypeObject Decoder_type = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"fec.Decoder", /*tp_name*/
|
||||
"_fec.Decoder", /*tp_name*/
|
||||
sizeof(Decoder), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor)Decoder_dealloc, /*tp_dealloc*/
|
||||
|
@ -1,103 +0,0 @@
|
||||
# zfec -- fast forward error correction library with Python interface
|
||||
#
|
||||
# Copyright (C) 2007 Allmydata, Inc.
|
||||
# Author: Zooko Wilcox-O'Hearn
|
||||
# mailto:zooko@zooko.com
|
||||
#
|
||||
# This file is part of zfec.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation; either version 2 of the License, or (at your option) any later
|
||||
# version. This program also comes with the added permission that, in the case
|
||||
# that you are obligated to release a derived work under this licence (as per
|
||||
# section 2.b of the GPL), you may delay the fulfillment of this obligation for
|
||||
# up to 12 months.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
import fec
|
||||
|
||||
import array, random
|
||||
|
||||
def f_easyfec(filesize):
|
||||
return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_easyfec)
|
||||
|
||||
def f_fec_stringy(filesize):
|
||||
return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files_stringy)
|
||||
|
||||
def f_fec(filesize):
|
||||
return bench_encode_to_files_shuffle_decode_from_files(filesize, verbose=False, encodefunc=fec.filefec.encode_to_files)
|
||||
|
||||
def bench_encode_to_files_shuffle_decode_from_files(filesize=1000000, verbose=False, encodefunc=fec.filefec.encode_to_files):
|
||||
CHUNKSIZE=4096
|
||||
PREFIX="testshare"
|
||||
K=25
|
||||
M=100
|
||||
import os, time
|
||||
left=filesize
|
||||
outfile = open("tmpranddata", "wb")
|
||||
try:
|
||||
while left:
|
||||
d = os.urandom(min(left, CHUNKSIZE))
|
||||
outfile.write(d)
|
||||
left -= len(d)
|
||||
outfile.flush()
|
||||
outfile = None
|
||||
infile = open("tmpranddata", "rb")
|
||||
st = time.time()
|
||||
encodefunc(infile, PREFIX, K, M)
|
||||
so = time.time()
|
||||
if verbose:
|
||||
print "Encoded %s byte file into %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, M, so-st, filesize/((so-st)*filesize),)
|
||||
enctime = so-st
|
||||
# Now delete m-k of the tempfiles at random.
|
||||
tempfs = [ f for f in os.listdir(".") if f.startswith(PREFIX) ]
|
||||
random.shuffle(tempfs)
|
||||
for victimtempf in tempfs[:M-K]:
|
||||
os.remove(victimtempf)
|
||||
recoveredfile = open("tmpranddata-recovered", "wb")
|
||||
st = time.time()
|
||||
fec.filefec.decode_from_files(recoveredfile, filesize, PREFIX, K, M)
|
||||
so = time.time()
|
||||
if verbose:
|
||||
print "Decoded %s byte file from %d share files in %0.2f seconds, or %0.2f million bytes per second" % (filesize, K, so-st, filesize/((so-st)*filesize),)
|
||||
return enctime + (so-st)
|
||||
finally:
|
||||
# os.remove("tmpranddata")
|
||||
pass
|
||||
|
||||
def bench_read_encode_and_drop():
|
||||
FILESIZE=1000000
|
||||
CHUNKSIZE=4096
|
||||
import os, time
|
||||
left=FILESIZE
|
||||
outfile = open("tmpranddata", "wb")
|
||||
try:
|
||||
while left:
|
||||
d = os.urandom(min(left, CHUNKSIZE))
|
||||
outfile.write(d)
|
||||
left -= len(d)
|
||||
outfile.flush()
|
||||
outfile = None
|
||||
infile = open("tmpranddata", "rb")
|
||||
def cb(s, l):
|
||||
pass
|
||||
st = time.time()
|
||||
fec.filefec.encode_file(infile, cb, 25, 100, 4096)
|
||||
so = time.time()
|
||||
print "Encoded %s byte file in %0.2f seconds, or %0.2f million bytes per second" % (FILESIZE, so-st, FILESIZE/((so-st)*1000000),)
|
||||
return so-st
|
||||
finally:
|
||||
os.remove("tmpranddata")
|
||||
|
||||
if __name__ == "__main__":
|
||||
bench_encode_to_files_shuffle_decode_from_files()
|
||||
|
Loading…
x
Reference in New Issue
Block a user