use pyfec instead of py_ecc for erasure coding and update API to codec

This commit is contained in:
Zooko O'Whielacronx 2007-02-01 16:07:00 -07:00
parent 1373789463
commit dd4ad3d542
12 changed files with 151 additions and 2377 deletions

View File

@ -11,10 +11,8 @@ setup(
version="0.0.1", version="0.0.1",
packages=["allmydata", "allmydata.test", "allmydata.util", packages=["allmydata", "allmydata.test", "allmydata.util",
"allmydata.filetree", "allmydata.scripts", "allmydata.filetree", "allmydata.scripts",
"allmydata.py_ecc",
], ],
package_dir={ "allmydata": "src/allmydata", package_dir={ "allmydata": "src/allmydata",
"allmydata.py_ecc": "src/py_ecc",
}, },
scripts = ["bin/allmydata-tahoe"], scripts = ["bin/allmydata-tahoe"],
package_data={ 'allmydata': ['web/*.xhtml'] }, package_data={ 'allmydata': ['web/*.xhtml'] },

View File

@ -4,12 +4,25 @@ from zope.interface import implements
from twisted.internet import defer from twisted.internet import defer
import sha import sha
from allmydata.util import idlib, mathutil from allmydata.util import idlib, mathutil
from allmydata.util.assertutil import _assert, precondition
from allmydata.interfaces import ICodecEncoder, ICodecDecoder from allmydata.interfaces import ICodecEncoder, ICodecDecoder
from allmydata.py_ecc import rs_code import fec
def netstring(s): def netstring(s):
return "%d:%s," % (len(s), s) return "%d:%s," % (len(s), s)
from base64 import b32encode
def ab(x): # debuggery
if len(x) >= 3:
return "%s:%s" % (len(x), b32encode(x[-3:]),)
elif len(x) == 2:
return "%s:%s" % (len(x), b32encode(x[-2:]),)
elif len(x) == 1:
return "%s:%s" % (len(x), b32encode(x[-1:]),)
elif len(x) == 0:
return "%s:%s" % (len(x), "--empty--",)
class ReplicatingEncoder(object): class ReplicatingEncoder(object):
implements(ICodecEncoder) implements(ICodecEncoder)
ENCODER_TYPE = "rep" ENCODER_TYPE = "rep"
@ -28,12 +41,11 @@ class ReplicatingEncoder(object):
def get_share_size(self): def get_share_size(self):
return self.data_size return self.data_size
def encode(self, data, num_shares=None): def encode(self, data, desired_shareids=None):
if num_shares is None: if desired_shareids is None:
num_shares = self.max_shares desired_shareids = range(self.max_shares)
assert num_shares <= self.max_shares shares = [data for i in desired_shareids]
shares = [(i,data) for i in range(num_shares)] return defer.succeed((shares, desired_shareids))
return defer.succeed(shares)
class ReplicatingDecoder(object): class ReplicatingDecoder(object):
implements(ICodecDecoder) implements(ICodecDecoder)
@ -44,10 +56,10 @@ class ReplicatingDecoder(object):
def get_required_shares(self): def get_required_shares(self):
return self.required_shares return self.required_shares
def decode(self, some_shares): def decode(self, some_shares, their_shareids):
assert len(some_shares) >= self.required_shares assert len(some_shares) == self.required_shares
data = some_shares[0][1] assert len(some_shares) == len(their_shareids)
return defer.succeed(data) return defer.succeed(some_shares[0])
class Encoder(object): class Encoder(object):
@ -68,7 +80,7 @@ class Encoder(object):
class Decoder(object): class Decoder(object):
def __init__(self, outfile, k, m, verifierid): def __init__(self, outfile, k, m, verifierid):
self.outfile = outfile self.outfile = outfile
self.k = 2 self.k = k
self.m = m self.m = m
self._verifierid = verifierid self._verifierid = verifierid
@ -94,107 +106,57 @@ class Decoder(object):
assert self._verifierid == vid, "%s != %s" % (idlib.b2a(self._verifierid), idlib.b2a(vid)) assert self._verifierid == vid, "%s != %s" % (idlib.b2a(self._verifierid), idlib.b2a(vid))
class PyRSEncoder(object): class CRSEncoder(object):
implements(ICodecEncoder) implements(ICodecEncoder)
ENCODER_TYPE = "pyrs" ENCODER_TYPE = 2
# we will break the data into vectors in which each element is a single
# byte (i.e. a single number from 0 to 255), and the length of the vector
# is equal to the number of required_shares. We use padding to make the
# last chunk of data long enough to match, and we record the data_size in
# the serialized parameters to strip this padding out on the receiving
# end.
# TODO: this will write a 733kB file called 'ffield.lut.8' in the current
# directory the first time it is run, to cache the lookup table for later
# use. It appears to take about 15 seconds to create this the first time,
# and about 0.5s to load it in each time afterwards. Make sure this file
# winds up somewhere reasonable.
# TODO: the encoder/decoder RSCode object depends upon the number of
# required/total shares, but not upon the data. We could probably save a
# lot of initialization time by caching a single instance and using it
# any time we use the same required/total share numbers (which will
# probably be always).
# on my workstation (fluxx, a 3.5GHz Athlon), this encodes data at a rate
# of 6.7kBps. Zooko's mom's 1.8GHz G5 got 2.2kBps . slave3 took 40s to
# construct the LUT and encodes at 1.5kBps, and for some reason took more
# than 20 minutes to run the test_encode_share tests, so I disabled most
# of them. (uh, hello, it's running figleaf)
def set_params(self, data_size, required_shares, max_shares): def set_params(self, data_size, required_shares, max_shares):
assert required_shares <= max_shares assert required_shares <= max_shares
self.data_size = data_size self.data_size = data_size
self.required_shares = required_shares self.required_shares = required_shares
self.max_shares = max_shares self.max_shares = max_shares
self.chunk_size = required_shares self.share_size = mathutil.div_ceil(data_size, required_shares)
self.num_chunks = mathutil.div_ceil(data_size, self.chunk_size) self.last_share_padding = mathutil.pad_size(self.share_size, required_shares)
self.last_chunk_padding = mathutil.pad_size(data_size, required_shares) self.encoder = fec.Encoder(required_shares, max_shares)
self.share_size = self.num_chunks
self.encoder = rs_code.RSCode(max_shares, required_shares, 8)
def get_encoder_type(self): def get_encoder_type(self):
return self.ENCODER_TYPE return self.ENCODER_TYPE
def get_serialized_params(self): def get_serialized_params(self):
return "%d-%d-%d" % (self.data_size, self.required_shares, return "%d:%d:%d" % (self.data_size, self.required_shares,
self.max_shares) self.max_shares)
def get_share_size(self): def get_share_size(self):
return self.share_size return self.share_size
def encode(self, data, num_shares=None): def encode(self, inshares, desired_share_ids=None):
if num_shares is None: precondition(desired_share_ids is None or len(desired_share_ids) <= self.max_shares, desired_share_ids, self.max_shares)
num_shares = self.max_shares
assert num_shares <= self.max_shares
# we create self.max_shares shares, then throw out any extra ones
# so that we always return exactly num_shares shares.
share_data = [ [] for i in range(self.max_shares)] if desired_share_ids is None:
for i in range(self.num_chunks): desired_share_ids = range(self.max_shares)
# we take self.chunk_size bytes from the input string, and
# turn it into self.max_shares bytes.
offset = i*self.chunk_size
# Note string slices aren't an efficient way to use memory, so
# when we upgrade from the unusably slow py_ecc prototype to a
# fast ECC we should also fix up this memory usage (by using the
# array module).
chunk = data[offset:offset+self.chunk_size]
if i == self.num_chunks-1:
chunk = chunk + "\x00"*self.last_chunk_padding
assert len(chunk) == self.chunk_size
input_vector = [ord(x) for x in chunk]
assert len(input_vector) == self.required_shares
output_vector = self.encoder.Encode(input_vector)
assert len(output_vector) == self.max_shares
for i2,out in enumerate(output_vector):
share_data[i2].append(chr(out))
shares = [ (i, "".join(share_data[i])) for inshare in inshares:
for i in range(num_shares) ] assert len(inshare) == self.share_size, (len(inshare), self.share_size, self.data_size, self.required_shares)
return defer.succeed(shares) shares = self.encoder.encode(inshares, desired_share_ids)
class PyRSDecoder(object): return defer.succeed((shares, desired_share_ids))
class CRSDecoder(object):
implements(ICodecDecoder) implements(ICodecDecoder)
def set_serialized_params(self, params): def set_serialized_params(self, params):
pieces = params.split("-") pieces = params.split(":")
self.data_size = int(pieces[0]) self.data_size = int(pieces[0])
self.required_shares = int(pieces[1]) self.required_shares = int(pieces[1])
self.max_shares = int(pieces[2]) self.max_shares = int(pieces[2])
self.chunk_size = self.required_shares self.chunk_size = self.required_shares
self.num_chunks = mathutil.div_ceil(self.data_size, self.chunk_size) self.num_chunks = mathutil.div_ceil(self.data_size, self.chunk_size)
self.last_chunk_padding = mathutil.pad_size(self.data_size,
self.required_shares)
self.share_size = self.num_chunks self.share_size = self.num_chunks
self.encoder = rs_code.RSCode(self.max_shares, self.required_shares, self.decoder = fec.Decoder(self.required_shares, self.max_shares)
8)
if False: if False:
print "chunk_size: %d" % self.chunk_size print "chunk_size: %d" % self.chunk_size
print "num_chunks: %d" % self.num_chunks print "num_chunks: %d" % self.num_chunks
print "last_chunk_padding: %d" % self.last_chunk_padding
print "share_size: %d" % self.share_size print "share_size: %d" % self.share_size
print "max_shares: %d" % self.max_shares print "max_shares: %d" % self.max_shares
print "required_shares: %d" % self.required_shares print "required_shares: %d" % self.required_shares
@ -202,37 +164,15 @@ class PyRSDecoder(object):
def get_required_shares(self): def get_required_shares(self):
return self.required_shares return self.required_shares
def decode(self, some_shares): def decode(self, some_shares, their_shareids):
chunk_size = self.chunk_size precondition(len(some_shares) == len(their_shareids), len(some_shares), len(their_shareids))
assert len(some_shares) >= self.required_shares precondition(len(some_shares) == self.required_shares, len(some_shares), self.required_shares)
chunks = [] return defer.succeed(self.decoder.decode(some_shares, their_shareids))
have_shares = {}
for share_num, share_data in some_shares:
have_shares[share_num] = share_data
for i in range(self.share_size):
# this takes one byte from each share, and turns the combination
# into a single chunk
received_vector = []
for j in range(self.max_shares):
share = have_shares.get(j)
if share is not None:
received_vector.append(ord(share[i]))
else:
received_vector.append(None)
decoded_vector = self.encoder.DecodeImmediate(received_vector)
assert len(decoded_vector) == self.chunk_size
chunk = "".join([chr(x) for x in decoded_vector])
chunks.append(chunk)
data = "".join(chunks)
if self.last_chunk_padding:
data = data[:-self.last_chunk_padding]
assert len(data) == self.data_size
return defer.succeed(data)
all_encoders = { all_encoders = {
ReplicatingEncoder.ENCODER_TYPE: (ReplicatingEncoder, ReplicatingDecoder), ReplicatingEncoder.ENCODER_TYPE: (ReplicatingEncoder, ReplicatingDecoder),
PyRSEncoder.ENCODER_TYPE: (PyRSEncoder, PyRSDecoder), CRSEncoder.ENCODER_TYPE: (CRSEncoder, CRSDecoder),
} }
def get_decoder_by_name(name): def get_decoder_by_name(name):

View File

@ -5,7 +5,7 @@ from allmydata.chunk import HashTree, roundup_pow2
from allmydata.Crypto.Cipher import AES from allmydata.Crypto.Cipher import AES
import sha import sha
from allmydata.util import mathutil from allmydata.util import mathutil
from allmydata.codec import PyRSEncoder from allmydata.codec import CRSEncoder
def hash(data): def hash(data):
return sha.new(data).digest() return sha.new(data).digest()
@ -90,15 +90,13 @@ class Encoder(object):
self.num_segments = mathutil.div_ceil(self.file_size, self.segment_size) self.num_segments = mathutil.div_ceil(self.file_size, self.segment_size)
def setup_encoder(self): def setup_encoder(self):
self.encoder = PyRSEncoder() self.encoder = CRSEncoder()
self.encoder.set_params(self.segment_size, self.required_shares, self.encoder.set_params(self.segment_size, self.required_shares,
self.num_shares) self.num_shares)
self.share_size = self.encoder.get_share_size()
def get_reservation_size(self): def get_reservation_size(self):
self.num_shares = 100 self.num_shares = 100
self.share_size = self.file_size / self.required_shares self.share_size = mathutil.div_ceil(self.file_size, self.required_shares)
overhead = self.compute_overhead() overhead = self.compute_overhead()
return self.share_size + overhead return self.share_size + overhead
@ -126,52 +124,57 @@ class Encoder(object):
return d return d
def do_segment(self, segnum): def do_segment(self, segnum):
segment_plaintext = self.infile.read(self.segment_size) chunks = []
segment_crypttext = self.cryptor.encrypt(segment_plaintext) subsharesize = self.encoder.get_share_size()
del segment_plaintext for i in range(self.required_shares):
assert self.encoder.max_shares == self.num_shares d = self.infile.read(subsharesize)
d = self.encoder.encode(segment_crypttext) if len(d) < subsharesize:
# padding
d += ('\x00' * (subsharesize - len(d)))
d = self.cryptor.encrypt(d)
chunks.append(d)
d = self.encoder.encode(chunks)
d.addCallback(self._encoded_segment) d.addCallback(self._encoded_segment)
return d return d
def _encoded_segment(self, subshare_tuples): def _encoded_segment(self, (shares, shareids)):
dl = [] dl = []
for share_num,subshare in subshare_tuples: for shareid,subshare in zip(shareids, shares):
d = self.send_subshare(share_num, self.segment_num, subshare) d = self.send_subshare(shareid, self.segment_num, subshare)
dl.append(d) dl.append(d)
self.subshare_hashes[share_num].append(hash(subshare)) self.subshare_hashes[shareid].append(hash(subshare))
self.segment_num += 1 self.segment_num += 1
return defer.DeferredList(dl) return defer.DeferredList(dl)
def send_subshare(self, share_num, segment_num, subshare): def send_subshare(self, shareid, segment_num, subshare):
#if False: #if False:
# offset = hash_size + segment_num * segment_size # offset = hash_size + segment_num * segment_size
# return self.send(share_num, "write", subshare, offset) # return self.send(shareid, "write", subshare, offset)
return self.send(share_num, "put_subshare", segment_num, subshare) return self.send(shareid, "put_subshare", segment_num, subshare)
def send(self, share_num, methname, *args, **kwargs): def send(self, shareid, methname, *args, **kwargs):
ll = self.landlords[share_num] ll = self.landlords[shareid]
return ll.callRemote(methname, *args, **kwargs) return ll.callRemote(methname, *args, **kwargs)
def send_all_subshare_hash_trees(self): def send_all_subshare_hash_trees(self):
dl = [] dl = []
for share_num,hashes in enumerate(self.subshare_hashes): for shareid,hashes in enumerate(self.subshare_hashes):
# hashes is a list of the hashes of all subshares that were sent # hashes is a list of the hashes of all subshares that were sent
# to shareholder[share_num]. # to shareholder[shareid].
dl.append(self.send_one_subshare_hash_tree(share_num, hashes)) dl.append(self.send_one_subshare_hash_tree(shareid, hashes))
return defer.DeferredList(dl) return defer.DeferredList(dl)
def send_one_subshare_hash_tree(self, share_num, subshare_hashes): def send_one_subshare_hash_tree(self, shareid, subshare_hashes):
t = HashTree(subshare_hashes) t = HashTree(subshare_hashes)
all_hashes = list(t) all_hashes = list(t)
# all_hashes[0] is the root hash, == hash(ah[1]+ah[2]) # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
# all_hashes[1] is the left child, == hash(ah[3]+ah[4]) # all_hashes[1] is the left child, == hash(ah[3]+ah[4])
# all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2]) # all_hashes[n] == hash(all_hashes[2*n+1] + all_hashes[2*n+2])
self.share_root_hashes[share_num] = t[0] self.share_root_hashes[shareid] = t[0]
if False: if False:
block = "".join(all_hashes) block = "".join(all_hashes)
return self.send(share_num, "write", block, offset=0) return self.send(shareid, "write", block, offset=0)
return self.send(share_num, "put_subshare_hashes", all_hashes) return self.send(shareid, "put_subshare_hashes", all_hashes)
def send_all_share_hash_trees(self): def send_all_share_hash_trees(self):
dl = [] dl = []
@ -192,13 +195,13 @@ class Encoder(object):
dl.append(self.send_one_share_hash_tree(i, hashes)) dl.append(self.send_one_share_hash_tree(i, hashes))
return defer.DeferredList(dl) return defer.DeferredList(dl)
def send_one_share_hash_tree(self, share_num, needed_hashes): def send_one_share_hash_tree(self, shareid, needed_hashes):
return self.send(share_num, "put_share_hashes", needed_hashes) return self.send(shareid, "put_share_hashes", needed_hashes)
def close_all_shareholders(self): def close_all_shareholders(self):
dl = [] dl = []
for share_num in range(self.num_shares): for shareid in range(self.num_shares):
dl.append(self.send(share_num, "close")) dl.append(self.send(shareid, "close"))
return defer.DeferredList(dl) return defer.DeferredList(dl)
def done(self): def done(self):

View File

@ -112,48 +112,35 @@ class ICodecEncoder(Interface):
""" """
def encode(inshares, desired_share_ids=None): def encode(inshares, desired_share_ids=None):
"""Encode a chunk of data. This may be called multiple times. Each """Encode some data. This may be called multiple times. Each call is
call is independent. independent.
The data is required to be a string with a length that exactly inshares is a sequence of length required_shares, containing buffers,
matches the data_size promised by set_params(). where each buffer contains the next contiguous non-overlapping
segment of the input data. Each buffer is required to be the same
length, and the sum of the lengths of the buffers is required to be
exactly the data_size promised by set_params(). (This implies that
the data has to be padded before being passed to encode(), unless of
course it already happens to be an even multiple of required_shares in
length.)
'num_shares', if provided, is required to be equal or less than the 'desired_share_ids', if provided, is required to be a sequence of ints,
'max_shares' set in set_params. If 'num_shares' is left at None, each of which is required to be >= 0 and < max_shares.
this method will produce 'max_shares' shares. This can be used to
minimize the work that the encoder needs to do if we initially
thought that we would need, say, 100 shares, but now that it is time
to actually encode the data we only have 75 peers to send data to.
For each call, encode() will return a Deferred that fires with two For each call, encode() will return a Deferred that fires with two
lists, one containing shares and the other containing the sharenums, lists, one containing shares and the other containing the shareids.
which is an int from 0 to num_shares-1. The get_share_size() method The get_share_size() method can be used to determine the length of the
can be used to determine the length of the 'sharedata' strings share strings returned by encode().
returned by encode().
The sharedatas and their corresponding sharenums are required to be The shares and their corresponding shareids are required to be kept
kept together during storage and retrieval. Specifically, the share together during storage and retrieval. Specifically, the share data is
data is useless by itself: the decoder needs to be told which share is useless by itself: the decoder needs to be told which share is which
which by providing it with both the share number and the actual by providing it with both the shareid and the actual share data.
share data.
The memory usage of this function is expected to be on the order of The memory usage of this function is expected to be on the order of
total_shares * get_share_size().
(max_shares - required_shares) * get_share_size().
""" """
# design note: we could embed the share number in the sharedata by
# returning bencode((sharenum,sharedata)). The advantage would be
# making it easier to keep these two pieces together, and probably
# avoiding a round trip when reading the remote bucket (although this
# could be achieved by changing RIBucketReader.read to
# read_data_and_metadata). The disadvantage is that the share number
# wants to be exposed to the storage/bucket layer (specifically to
# handle the next stage of peer-selection algorithm in which we
# propose to keep share#A on a given peer and they are allowed to
# tell us that they already have share#B). Also doing this would make
# the share size somewhat variable (one-digit sharenumbers will be a
# byte shorter than two-digit sharenumbers), unless we zero-pad the
# sharenumbers based upon the max_total_shares declared in
# set_params.
class ICodecDecoder(Interface): class ICodecDecoder(Interface):
def set_serialized_params(params): def set_serialized_params(params):
@ -167,17 +154,21 @@ class ICodecDecoder(Interface):
def decode(some_shares, their_shareids): def decode(some_shares, their_shareids):
"""Decode a partial list of shares into data. """Decode a partial list of shares into data.
'some_shares' is required to be a list of buffers of sharedata, a 'some_shares' is required to be a sequence of buffers of sharedata, a
subset of the shares returned by ICodecEncode.encode(). Each share is subset of the shares returned by ICodecEncode.encode(). Each share is
required to be of the same length. The i'th element of their_shareids required to be of the same length. The i'th element of their_shareids
is required to be the share id (or "share num") of the i'th buffer in is required to be the shareid of the i'th buffer in some_shares.
some_shares.
This returns a Deferred which fires with a sequence of buffers. This This returns a Deferred which fires with a sequence of buffers. This
sequence will contain all of the segments of the original data, in sequence will contain all of the segments of the original data, in
order. The sum of the lengths of all of the buffers will be the order. The sum of the lengths of all of the buffers will be the
'data_size' value passed into the original ICodecEncode.set_params() 'data_size' value passed into the original ICodecEncode.set_params()
call. call. Note that some of the elements in the result sequence may be
references to the elements of the some_shares input sequence. In
particular, this means that if those share objects are mutable (e.g.
arrays) and if they are changed then both the input (the 'some_shares'
parameter) and the output (the value given when the deferred is
triggered) will change.
The length of 'some_shares' is required to be exactly the value of The length of 'some_shares' is required to be exactly the value of
'required_shares' passed into the original ICodecEncode.set_params() 'required_shares' passed into the original ICodecEncode.set_params()

View File

@ -3,71 +3,57 @@ import os, time
from twisted.trial import unittest from twisted.trial import unittest
from twisted.internet import defer from twisted.internet import defer
from twisted.python import log from twisted.python import log
from allmydata.codec import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder from allmydata.codec import ReplicatingEncoder, ReplicatingDecoder, CRSEncoder, CRSDecoder
import random import random
from allmydata.util import mathutil
class Tester: class Tester:
#enc_class = PyRSEncoder
#dec_class = PyRSDecoder
def do_test(self, size, required_shares, max_shares, fewer_shares=None): def do_test(self, size, required_shares, max_shares, fewer_shares=None):
data0 = os.urandom(size) data0s = [os.urandom(mathutil.div_ceil(size, required_shares)) for i in range(required_shares)]
enc = self.enc_class() enc = self.enc_class()
enc.set_params(size, required_shares, max_shares) enc.set_params(size, required_shares, max_shares)
serialized_params = enc.get_serialized_params() serialized_params = enc.get_serialized_params()
log.msg("serialized_params: %s" % serialized_params) log.msg("serialized_params: %s" % serialized_params)
d = enc.encode(data0) d = enc.encode(data0s)
def _done_encoding_all(shares): def _done_encoding_all((shares, shareids)):
self.failUnlessEqual(len(shares), max_shares) self.failUnlessEqual(len(shares), max_shares)
self.shares = shares self.shares = shares
self.shareids = shareids
d.addCallback(_done_encoding_all) d.addCallback(_done_encoding_all)
if fewer_shares is not None: if fewer_shares is not None:
# also validate that the num_shares= parameter works # also validate that the desired_shareids= parameter works
d.addCallback(lambda res: enc.encode(data0, fewer_shares)) desired_shareids = random.sample(range(max_shares), fewer_shares)
def _check_fewer_shares(some_shares): d.addCallback(lambda res: enc.encode(data0s, desired_shareids))
self.failUnlessEqual(len(some_shares), fewer_shares) def _check_fewer_shares((some_shares, their_shareids)):
self.failUnlessEqual(tuple(their_shareids), tuple(desired_shareids))
d.addCallback(_check_fewer_shares) d.addCallback(_check_fewer_shares)
def _decode(shares): def _decode((shares, shareids)):
dec = self.dec_class() dec = self.dec_class()
dec.set_serialized_params(serialized_params) dec.set_serialized_params(serialized_params)
d1 = dec.decode(shares) d1 = dec.decode(shares, shareids)
return d1 return d1
def _check_data(decoded_shares): def _check_data(decoded_shares):
data1 = "".join(decoded_shares) self.failUnlessEqual(len(decoded_shares), len(data0s))
self.failUnlessEqual(len(data1), len(data0)) self.failUnless(tuple(decoded_shares) == tuple(data0s))
self.failUnless(data1 == data0)
def _decode_all_ordered(res):
log.msg("_decode_all_ordered")
# can we decode using all of the shares?
return _decode(self.shares)
d.addCallback(_decode_all_ordered)
d.addCallback(_check_data)
def _decode_all_shuffled(res):
log.msg("_decode_all_shuffled")
# can we decode, using all the shares, but in random order?
shuffled_shares = self.shares[:]
random.shuffle(shuffled_shares)
return _decode(shuffled_shares)
d.addCallback(_decode_all_shuffled)
d.addCallback(_check_data)
def _decode_some(res): def _decode_some(res):
log.msg("_decode_some") log.msg("_decode_some")
# decode with a minimal subset of the shares # decode with a minimal subset of the shares
some_shares = self.shares[:required_shares] some_shares = self.shares[:required_shares]
return _decode(some_shares) some_shareids = self.shareids[:required_shares]
return _decode((some_shares, some_shareids))
d.addCallback(_decode_some) d.addCallback(_decode_some)
d.addCallback(_check_data) d.addCallback(_check_data)
def _decode_some_random(res): def _decode_some_random(res):
log.msg("_decode_some_random") log.msg("_decode_some_random")
# use a randomly-selected minimal subset # use a randomly-selected minimal subset
some_shares = random.sample(self.shares, required_shares) l = random.sample(zip(self.shares, self.shareids), required_shares)
return _decode(some_shares) some_shares = [ x[0] for x in l ]
some_shareids = [ x[1] for x in l ]
return _decode((some_shares, some_shareids))
d.addCallback(_decode_some_random) d.addCallback(_decode_some_random)
d.addCallback(_check_data) d.addCallback(_check_data)
@ -75,12 +61,17 @@ class Tester:
log.msg("_decode_multiple") log.msg("_decode_multiple")
# make sure we can re-use the decoder object # make sure we can re-use the decoder object
shares1 = random.sample(self.shares, required_shares) shares1 = random.sample(self.shares, required_shares)
shares2 = random.sample(self.shares, required_shares) sharesl1 = random.sample(zip(self.shares, self.shareids), required_shares)
shares1 = [ x[0] for x in sharesl1 ]
shareids1 = [ x[1] for x in sharesl1 ]
sharesl2 = random.sample(zip(self.shares, self.shareids), required_shares)
shares2 = [ x[0] for x in sharesl2 ]
shareids2 = [ x[1] for x in sharesl2 ]
dec = self.dec_class() dec = self.dec_class()
dec.set_serialized_params(serialized_params) dec.set_serialized_params(serialized_params)
d1 = dec.decode(shares1) d1 = dec.decode(shares1, shareids1)
d1.addCallback(_check_data) d1.addCallback(_check_data)
d1.addCallback(lambda res: dec.decode(shares2)) d1.addCallback(lambda res: dec.decode(shares2, shareids2))
d1.addCallback(_check_data) d1.addCallback(_check_data)
return d1 return d1
d.addCallback(_decode_multiple) d.addCallback(_decode_multiple)
@ -88,73 +79,25 @@ class Tester:
return d return d
def test_encode(self): def test_encode(self):
if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder:
raise unittest.SkipTest("slave3 is really slow")
return self.do_test(1000, 25, 100) return self.do_test(1000, 25, 100)
def test_encode1(self): def test_encode1(self):
return self.do_test(8, 8, 16) return self.do_test(8, 8, 16)
def test_encode2(self): def test_encode2(self):
if os.uname()[1] == "slave3" and self.enc_class == PyRSEncoder:
raise unittest.SkipTest("slave3 is really slow")
return self.do_test(123, 25, 100, 90) return self.do_test(123, 25, 100, 90)
def test_sizes(self): def test_sizes(self):
raise unittest.SkipTest("omg this would take forever")
d = defer.succeed(None) d = defer.succeed(None)
for i in range(1, 100): for i in range(1, 100):
d.addCallback(lambda res,size: self.do_test(size, 4, 10), i) d.addCallback(lambda res,size: self.do_test(size, 4, 10), i)
return d return d
class PyRS(unittest.TestCase, Tester):
enc_class = PyRSEncoder
dec_class = PyRSDecoder
class Replicating(unittest.TestCase, Tester): class Replicating(unittest.TestCase, Tester):
enc_class = ReplicatingEncoder enc_class = ReplicatingEncoder
dec_class = ReplicatingDecoder dec_class = ReplicatingDecoder
class CRS(unittest.TestCase, Tester):
enc_class = CRSEncoder
dec_class = CRSDecoder
class BenchPyRS(unittest.TestCase):
enc_class = PyRSEncoder
def test_big(self):
size = 10000
required_shares = 25
max_shares = 100
# this lets us use a persistent lookup table, stored outside the
# _trial_temp directory (which is deleted each time trial is run)
os.symlink("../ffield.lut.8", "ffield.lut.8")
enc = self.enc_class()
self.start()
enc.set_params(size, required_shares, max_shares)
serialized_params = enc.get_serialized_params()
print "encoder ready", self.stop()
self.start()
data0 = os.urandom(size)
print "data ready", self.stop()
self.start()
d = enc.encode(data0)
def _done(shares):
now_shares = time.time()
print "shares ready", self.stop()
self.start()
self.failUnlessEqual(len(shares), max_shares)
d.addCallback(_done)
d.addCallback(lambda res: enc.encode(data0))
d.addCallback(_done)
d.addCallback(lambda res: enc.encode(data0))
d.addCallback(_done)
return d
def start(self):
self.start_time = time.time()
def stop(self):
self.end_time = time.time()
return (self.end_time - self.start_time)
# to benchmark the encoder, delete this line
del BenchPyRS
# and then run 'make test TEST=allmydata.test.test_encode_share.BenchPyRS'

View File

@ -60,11 +60,8 @@ from twisted.trial.reporter import TreeReporter, VerboseTextReporter
# finish in printSummary. # finish in printSummary.
from allmydata.util import figleaf from allmydata.util import figleaf
# don't cover py_ecc, it takes forever
from allmydata.py_ecc import rs_code
import os import os
py_ecc_dir = os.path.realpath(os.path.dirname(rs_code.__file__)) figleaf.start()
figleaf.start(ignore_prefixes=[py_ecc_dir])
class FigleafReporter(TreeReporter): class FigleafReporter(TreeReporter):

View File

@ -216,7 +216,7 @@ class FileUploader:
assert sorted(self.sharemap.keys()) == range(len(landlords)) assert sorted(self.sharemap.keys()) == range(len(landlords))
# encode all the data at once: this class does not use segmentation # encode all the data at once: this class does not use segmentation
data = self._filehandle.read() data = self._filehandle.read()
d = self._encoder.encode(data, len(landlords)) d = self._encoder.encode(data, self.sharemap.keys())
d.addCallback(self._send_all_shares) d.addCallback(self._send_all_shares)
d.addCallback(lambda res: self._encoder.get_serialized_params()) d.addCallback(lambda res: self._encoder.get_serialized_params())
return d return d
@ -229,17 +229,16 @@ class FileUploader:
bucket.callRemote("close")) bucket.callRemote("close"))
return d return d
def _send_all_shares(self, shares): def _send_all_shares(self, (shares, shareids)):
dl = [] dl = []
for share in shares: for (shareid, share) in zip(shareids, shares):
(sharenum,sharedata) = share
if self.debug: if self.debug:
log.msg(" writing share %d" % sharenum) log.msg(" writing share %d" % shareid)
metadata = bencode.bencode(sharenum) metadata = bencode.bencode(shareid)
assert len(sharedata) == self._share_size assert len(share) == self._share_size
assert isinstance(sharedata, str) assert isinstance(share, str)
bucket = self.sharemap[sharenum] bucket = self.sharemap[shareid]
d = self._send_one_share(bucket, sharedata, metadata) d = self._send_one_share(bucket, share, metadata)
dl.append(d) dl.append(d)
return DeferredListShouldSucceed(dl) return DeferredListShouldSucceed(dl)

View File

View File

@ -1,764 +0,0 @@
# Copyright Emin Martinian 2002. See below for license terms.
# Version Control Info: $Id: ffield.py,v 1.10 2003/10/28 21:19:43 emin Exp $
"""
This package contains the FField class designed to perform calculations
in finite fields of characteristic two. The following docstrings provide
detailed information on various topics:
FField.__doc__ Describes the methods of the FField class and how
to use them.
FElement.__doc__ Describes the FElement class and how to use it.
fields_doc Briefly describes what a finite field is and
establishes notation for further documentation.
design_doc Discusses the design of the FField class and attempts
to justify why certain decisions were made.
license_doc Describes the license and lack of warranty for
this code.
testing_doc Describes some tests to make sure the code is working
as well as some of the built in testing routines.
"""
import string, random, os, os.path, cPickle
# The following list of primitive polynomials are the Conway Polynomials
# from the list at
# http://www.math.rwth-aachen.de/~Frank.Luebeck/ConwayPol/cp2.html
gPrimitivePolys = {}
gPrimitivePolysCondensed = {
1 : (1,0),
2 : (2,1,0),
3 : (3,1,0),
4 : (4,1,0),
5 : (5,2,0),
6 : (6,4,3,1,0),
7 : (7,1,0),
8 : (8,4,3,2,0),
9 : (9,4,0),
10 : (10,6,5,3,2,1,0),
11 : (11,2,0),
12 : (12,7,6,5,3,1,0),
13 : (13,4,3,1,0),
14 : (14,7,5,3,0),
15 : (15,5,4,2,0),
16 : (16,5,3,2,0),
17 : (17,3,0),
18 : (18,12,10,1,0),
19 : (19,5,2,1,0),
20 : (20,10,9,7,6,5,4,1,0),
21 : (21,6,5,2,0),
22 : (22,12,11,10,9,8,6,5,0),
23 : (23,5,0),
24 : (24,16,15,14,13,10,9,7,5,3,0),
25 : (25,8,6,2,0),
26 : (26,14,10,8,7,6,4,1,0),
27 : (27,12,10,9,7,5,3,2,0),
28 : (28,13,7,6,5,2,0),
29 : (29,2,0),
30 : (30,17,16,13,11,7,5,3,2,1,0),
31 : (31,3,0),
32 : (32,15,9,7,4,3,0),
33 : (33,13,12,11,10,8,6,3,0),
34 : (34,16,15,12,11,8,7,6,5,4,2,1,0),
35 : (35, 11, 10, 7, 5, 2, 0),
36 : (36, 23, 22, 20, 19, 17, 14, 13, 8, 6, 5, 1, 0),
37 : (37, 5, 4, 3, 2, 1, 0),
38 : (38, 14, 10, 9, 8, 5, 2, 1, 0),
39 : (39, 15, 12, 11, 10, 9, 7, 6, 5, 2 , 0),
40 : (40, 23, 21, 18, 16, 15, 13, 12, 8, 5, 3, 1, 0),
97 : (97,6,0),
100 : (100,15,0)
}
for n in gPrimitivePolysCondensed.keys():
gPrimitivePolys[n] = [0]*(n+1)
if (n < 16):
unity = 1
else:
unity = long(1)
for index in gPrimitivePolysCondensed[n]:
gPrimitivePolys[n][index] = unity
class FField:
"""
The FField class implements a finite field calculator. The
following functions are provided:
__init__
Add
Subtract
Multiply
Inverse
Divide
FindDegree
MultiplyWithoutReducing
ExtendedEuclid
FullDivision
ShowCoefficients
ShowPolynomial
GetRandomElement
ConvertListToElement
TestFullDivision
TestInverse
Most of these methods take integers or longs representing field
elements as arguments and return integers representing the desired
field elements as output. See ffield.fields_doc for an explanation
of the integer representation of field elements.
Example of how to use the FField class:
>>> import ffield
>>> F = ffield.FField(5) # create the field GF(2^5)
>>> a = 7 # field elements are denoted as integers from 0 to 2^5-1
>>> b = 15
>>> F.ShowPolynomial(a) # show the polynomial representation of a
'x^2 + x^1 + 1'
>>> F.ShowPolynomial(b)
'x^3 + x^2 + x^1 + 1'
>>> c = F.Multiply(a,b) # multiply a and b modulo the field generator
>>> c
4
>>> F.ShowPolynomial(c)
'x^2'
>>> F.Multiply(c,F.Inverse(a)) == b # verify multiplication works
1
>>> F.Multiply(c,F.Inverse(b)) == a # verify multiplication works
1
>>> d = F.Divide(c,b) # since c = F.Multiply(a,b), d should give a
>>> d
7
See documentation on the appropriate method for further details.
"""
def __init__(self,n,gen=0,useLUT=-1):
"""
This method constructs the field GF(2^p). It takes one
required argument, n = p, and two optional arguments, gen,
representing the coefficients of the generator polynomial
(of degree n) to use and useLUT describing whether to use
a lookup table. If no gen argument is provided, the
Conway Polynomial of degree n is obtained from the table
gPrimitivePolys.
If useLUT = 1 then a lookup table is used for
computing finite field multiplies and divides.
If useLUT = 0 then no lookup table is used.
If useLUT = -1 (the default), then the code
decides when a lookup table should be used.
Note that you can look at the generator for the field object
F by looking at F.generator.
"""
self.n = n
if (gen):
self.generator = gen
else:
self.generator = self.ConvertListToElement(gPrimitivePolys[n])
if (useLUT == 1 or (useLUT == -1 and self.n < 10)): # use lookup table
self.unity = 1
self.Inverse = self.DoInverseForSmallField
self.PrepareLUT()
self.Multiply = self.LUTMultiply
self.Divide = self.LUTDivide
self.Inverse = lambda x: self.LUTDivide(1,x)
elif (self.n < 15):
self.unity = 1
self.Inverse = self.DoInverseForSmallField
self.Multiply = self.DoMultiply
self.Divide = self.DoDivide
else: # Need to use longs for larger fields
self.unity = long(1)
self.Inverse = self.DoInverseForBigField
self.Multiply = lambda a,b: self.DoMultiply(long(a),long(b))
self.Divide = lambda a,b: self.DoDivide(long(a),long(b))
def PrepareLUT(self):
fieldSize = 1 << self.n
lutName = 'ffield.lut.' + `self.n`
if (os.path.exists(lutName)):
fd = open(lutName,'r')
self.lut = cPickle.load(fd)
fd.close()
else:
self.lut = LUT()
self.lut.mulLUT = range(fieldSize)
self.lut.divLUT = range(fieldSize)
self.lut.mulLUT[0] = [0]*fieldSize
self.lut.divLUT[0] = ['NaN']*fieldSize
for i in range(1,fieldSize):
self.lut.mulLUT[i] = map(lambda x: self.DoMultiply(i,x),
range(fieldSize))
self.lut.divLUT[i] = map(lambda x: self.DoDivide(i,x),
range(fieldSize))
fd = open(lutName,'w')
cPickle.dump(self.lut,fd)
fd.close()
def LUTMultiply(self,i,j):
return self.lut.mulLUT[i][j]
def LUTDivide(self,i,j):
return self.lut.divLUT[i][j]
def Add(self,x,y):
"""
Adds two field elements and returns the result.
"""
return x ^ y
def Subtract(self,x,y):
"""
Subtracts the second argument from the first and returns
the result. In fields of characteristic two this is the same
as the Add method.
"""
return self.Add(x,y)
def DoMultiply(self,f,v):
"""
Multiplies two field elements (modulo the generator
self.generator) and returns the result.
See MultiplyWithoutReducing if you don't want multiplication
modulo self.generator.
"""
m = self.MultiplyWithoutReducing(f,v)
return self.FullDivision(m,self.generator,
self.FindDegree(m),self.n)[1]
def DoInverseForSmallField(self,f):
"""
Computes the multiplicative inverse of its argument and
returns the result.
"""
return self.ExtendedEuclid(1,f,self.generator,
self.FindDegree(f),self.n)[1]
def DoInverseForBigField(self,f):
"""
Computes the multiplicative inverse of its argument and
returns the result.
"""
return self.ExtendedEuclid(self.unity,long(f),self.generator,
self.FindDegree(long(f)),self.n)[1]
def DoDivide(self,f,v):
"""
Divide(f,v) returns f * v^-1.
"""
return self.DoMultiply(f,self.Inverse(v))
def FindDegree(self,v):
"""
Find the degree of the polynomial representing the input field
element v. This takes O(degree(v)) operations.
A faster version requiring only O(log(degree(v)))
could be written using binary search...
"""
if (v):
result = -1
while(v):
v = v >> 1
result = result + 1
return result
else:
return 0
def MultiplyWithoutReducing(self,f,v):
"""
Multiplies two field elements and does not take the result
modulo self.generator. You probably should not use this
unless you know what you are doing; look at Multiply instead.
NOTE: If you are using fields larger than GF(2^15), you should
make sure that f and v are longs not integers.
"""
result = 0
mask = self.unity
i = 0
while (i <= self.n):
if (mask & v):
result = result ^ f
f = f << 1
mask = mask << 1
i = i + 1
return result
def ExtendedEuclid(self,d,a,b,aDegree,bDegree):
"""
Takes arguments (d,a,b,aDegree,bDegree) where d = gcd(a,b)
and returns the result of the extended Euclid algorithm
on (d,a,b).
"""
if (b == 0):
return (a,self.unity,0)
else:
(floorADivB, aModB) = self.FullDivision(a,b,aDegree,bDegree)
(d,x,y) = self.ExtendedEuclid(d, b, aModB, bDegree,
self.FindDegree(aModB))
return (d,y,self.Subtract(x,self.DoMultiply(floorADivB,y)))
def FullDivision(self,f,v,fDegree,vDegree):
"""
Takes four arguments, f, v, fDegree, and vDegree where
fDegree and vDegree are the degrees of the field elements
f and v represented as a polynomials.
This method returns the field elements a and b such that
f(x) = a(x) * v(x) + b(x).
That is, a is the divisor and b is the remainder, or in
other words a is like floor(f/v) and b is like f modulo v.
"""
result = 0
i = fDegree
mask = self.unity << i
while (i >= vDegree):
if (mask & f):
result = result ^ (self.unity << (i - vDegree))
f = self.Subtract(f, v << (i - vDegree))
i = i - 1
mask = mask >> self.unity
return (result,f)
def ShowCoefficients(self,f):
"""
Show coefficients of input field element represented as a
polynomial in decreasing order.
"""
fDegree = self.n
result = []
for i in range(fDegree,-1,-1):
if ((self.unity << i) & f):
result.append(1)
else:
result.append(0)
return result
def ShowPolynomial(self,f):
"""
Show input field element represented as a polynomial.
"""
fDegree = self.FindDegree(f)
result = ''
if (f == 0):
return '0'
for i in range(fDegree,0,-1):
if ((1 << i) & f):
result = result + (' x^' + `i`)
if (1 & f):
result = result + ' ' + `1`
return string.replace(string.strip(result),' ',' + ')
def GetRandomElement(self,nonZero=0,maxDegree=None):
"""
Return an element from the field chosen uniformly at random
or, if the optional argument nonZero is true, chosen uniformly
at random from the non-zero elements, or, if the optional argument
maxDegree is provided, ensure that the result has degree less
than maxDegree.
"""
if (None == maxDegree):
maxDegree = self.n
if (maxDegree <= 1 and nonZero):
return 1
if (maxDegree < 31):
return random.randint(nonZero != 0,(1<<maxDegree)-1)
else:
result = 0L
for i in range(0,maxDegree):
result = result ^ (random.randint(0,1) << long(i))
if (nonZero and result == 0):
return self.GetRandomElement(1)
else:
return result
def ConvertListToElement(self,l):
"""
This method takes as input a binary list (e.g. [1, 0, 1, 1])
and converts it to a decimal representation of a field element.
For example, [1, 0, 1, 1] is mapped to 8 | 2 | 1 = 11.
Note if the input list is of degree >= to the degree of the
generator for the field, then you will have to call take the
result modulo the generator to get a proper element in the
field.
"""
temp = map(lambda a, b: a << b, l, range(len(l)-1,-1,-1))
return reduce(lambda a, b: a | b, temp)
def TestFullDivision(self):
"""
Test the FullDivision function by generating random polynomials
a(x) and b(x) and checking whether (c,d) == FullDivision(a,b)
satsifies b*c + d == a
"""
f = 0
a = self.GetRandomElement(nonZero=1)
b = self.GetRandomElement(nonZero=1)
aDegree = self.FindDegree(a)
bDegree = self.FindDegree(b)
(c,d) = self.FullDivision(a,b,aDegree,bDegree)
recon = self.Add(d, self.Multiply(c,b))
assert (recon == a), ('TestFullDivision failed: a='
+ `a` + ', b=' + `b` + ', c='
+ `c` + ', d=' + `d` + ', recon=', recon)
def TestInverse(self):
"""
This function tests the Inverse function by generating
a random non-zero polynomials a(x) and checking if
a * Inverse(a) == 1.
"""
a = self.GetRandomElement(nonZero=1)
aInv = self.Inverse(a)
prod = self.Multiply(a,aInv)
assert 1 == prod, ('TestInverse failed:' + 'a=' + `a` + ', aInv='
+ `aInv` + ', prod=' + `prod`)
class LUT:
"""
Lookup table used to speed up some finite field operations.
"""
pass
class FElement:
"""
This class provides field elements which overload the
+,-,*,%,//,/ operators to be the appropriate field operation.
Note that before creating FElement objects you must first
create an FField object. For example,
>>> import ffield
>>> F = FField(5)
>>> e1 = FElement(F,7)
>>> e1
x^2 + x^1 + 1
>>> e2 = FElement(F,19)
>>> e2
x^4 + x^1 + 1
>>> e3 = e1 + e2
>>> e3
x^4 + x^2
>>> e4 = e3 / e2
>>> e4
x^4 + x^3 + x^2 + x^1 + 1
>>> e4 * e2 == (e3)
1
"""
def __init__(self,field,e):
"""
The constructor takes two arguments, field, and e where
field is an FField object and e is an integer representing
an element in FField.
The result is a new FElement instance.
"""
self.f = e
self.field = field
def __add__(self,other):
assert self.field == other.field
return FElement(self.field,self.field.Add(self.f,other.f))
def __mul__(self,other):
assert self.field == other.field
return FElement(self.field,self.field.Multiply(self.f,other.f))
def __mod__(self,o):
assert self.field == o.field
return FElement(self.field,
self.field.FullDivision(self.f,o.f,
self.field.FindDegree(self.f),
self.field.FindDegree(o.f))[1])
def __floordiv__(self,o):
assert self.field == o.field
return FElement(self.field,
self.field.FullDivision(self.f,o.f,
self.field.FindDegree(self.f),
self.field.FindDegree(o.f))[0])
def __div__(self,other):
assert self.field == other.field
return FElement(self.field,self.field.Divide(self.f,other.f))
def __str__(self):
return self.field.ShowPolynomial(self.f)
def __repr__(self):
return self.__str__()
def __eq__(self,other):
assert self.field == other.field
return self.f == other.f
def FullTest(testsPerField=10,sizeList=None):
"""
This function runs TestInverse and TestFullDivision for testsPerField
random field elements for each field size in sizeList. For example,
if sizeList = (1,5,7), then thests are run on GF(2), GF(2^5), and
GF(2^7). If sizeList == None (which is the default), then every
field is tested.
"""
if (None == sizeList):
sizeList = gPrimitivePolys.keys()
for i in sizeList:
F = FField(i)
for j in range(testsPerField):
F.TestInverse()
F.TestFullDivision()
fields_doc = """
Roughly speaking a finite field is a finite collection of elements
where most of the familiar rules of math work. Specifically, you
can add, subtract, multiply, and divide elements of a field and
continue to get elements in the field. This is useful because
computers usually store and send information in fixed size chunks.
Thus many useful algorithms can be described as elementary operations
(e.g. addition, subtract, multiplication, and division) of these chunks.
Currently this package only deals with fields of characteristic 2. That
is all fields we consider have exactly 2^p elements for some integer p.
We denote such fields as GF(2^p) and work with the elements represented
as p-1 degree polynomials in the indeterminate x. That is an element of
the field GF(2^p) looks something like
f(x) = c_{p-1} x^{p-1} + c_{p-2} x^{p-2} + ... + c_0
where the coefficients c_i are in binary.
Addition is performed by simply adding coefficients of degree i
modulo 2. For example, if we have two field elements f and v
represented as f(x) = x^2 + 1 and v(x) = x + 1 then s = f + v
is given by (x^2 + 1) + (x + 1) = x^2 + x. Multiplication is
performed modulo a p degree generator polynomial g(x).
For example, if f and v are as in the above example, then s = s * v
is given by (x^2 + 1) + (x + 1) mod g(x). Subtraction turns out
to be the same as addition for fields of characteristic 2. Division
is defined as f / v = f * v^-1 where v^-1 is the multiplicative
inverse of v. Multiplicative inverses in groups and fields
can be calculated using the extended Euclid algorithm.
Roughly speaking the intuition for why multiplication is
performed modulo g(x), is because we want to make sure s * v
returns an element in the field. Elements of the field are
polynomials of degree p-1, but regular multiplication could
yield terms of degree greater than p-1. Therefore we need a
rule for 'reducing' terms of degree p or greater back down
to terms of degree at most p-1. The 'reduction rule' is
taking things modulo g(x).
For another way to think of
taking things modulo g(x) as a 'reduction rule', imagine
g(x) = x^7 + x + 1 and we want to take some polynomial,
f(x) = x^8 + x^3 + x, modulo g(x). We can think of g(x)
as telling us that we can replace every occurence of
x^7 with x + 1. Thus f(x) becomes x * x^7 + x^3 + x which
becomes x * (x + 1) + x^3 + x = x^3 + x^2 . Essentially, taking
polynomials mod x^7 by replacing all x^7 terms with x + 1 will
force down the degree of f(x) until it is below 7 (the leading power
of g(x). See a book on abstract algebra for more details.
"""
design_doc = """
The FField class implements a finite field calculator for fields of
characteristic two. This uses a representation of field elements
as integers and has various methods to calculate the result of
adding, subtracting, multiplying, dividing, etc. field elements
represented AS INTEGERS OR LONGS.
The FElement class provides objects which act like a new kind of
numeric type (i.e. they overload the +,-,*,%,//,/ operators, and
print themselves as polynomials instead of integers).
Use the FField class for efficient storage and calculation.
Use the FElement class if you want to play around with finite
field math the way you would in something like Matlab or
Mathematica.
--------------------------------------------------------------------
WHY PYTHON?
You may wonder why a finite field calculator written in Python would
be useful considering all the C/C++/Java code already written to do
the same thing (and probably faster too). The goals of this project
are as follows, please keep them in mind if you make changes:
o Provide an easy to understand implementation of field operations.
Python lends itself well to comments and documentation. Hence,
we hope that in addition to being useful by itself, this project
will make it easier for people to implement finite field
computations in other languages. If you've ever looked at some
of the highly optimized finite field code written in C, you will
understand the need for a clear reference implementation of such
operations.
o Provide easy access to a finite field calculator.
Since you can just start up the Python interpreter and do
computations, a finite field calculator in Python lets you try
things out, check your work for other algorithms, etc.
Furthermore since a wealth of numerical packages exist for python,
you can easily write simulations or algorithms which draw upon
such routines with finite fields.
o Provide a platform independent framework for coding in Python.
Many useful error control codes can be implemented based on
finite fields. Some examples include error/erasure correction,
cyclic redundancy checks (CRCs), and secret sharing. Since
Python has a number of other useful Internet features being able
to implement these kinds of codes makes Python a better framework
for network programming.
o Leverages Python arbitrary precision code for large fields.
If you want to do computations over very large fields, for example
GF(2^p) with p > 31 you have to write lots of ugly bit field
code in most languages. Since Python has built in support for
arbitrary precision integers, you can make this code work for
arbitrary field sizes provided you operate on longs instead of
ints. That is if you give as input numbers like
0L, 1L, 1L << 55, etc., most of the code should work.
--------------------------------------------------------------------
BASIC DESIGN
The basic idea is to index entries in the finite field of interest
using integers and design the class methods to work properly on this
representation. Using integers is efficient since integers are easy
to store and manipulate and allows us to handle arbitrary field sizes
without changing the code if we instead switch to using longs.
Specifically, an integer represents a bit string
c = c_{p-1} c_{p-2} ... c_0.
which we interpret as the coefficients of a polynomial representing a
field element
f(x) = c_{p-1} x^{p-1} + c_{p-2} x^{p-2} + ... + c_0.
--------------------------------------------------------------------
FUTURE
In the future, support for fields of other
characteristic may be added (if people want them). Since computers
have built in parallelized operations for fields of characteristic
two (i.e. bitwise and, or, xor, etc.), this implementation uses
such operations to make most of the computations efficient.
"""
license_doc = """
This code was originally written by Emin Martinian (emin@allegro.mit.edu).
You may copy, modify, redistribute in source or binary form as long
as credit is given to the original author. Specifically, please
include some kind of comment or docstring saying that Emin Martinian
was one of the original authors. Also, if you publish anything based
on this work, it would be nice to cite the original author and any
other contributers.
There is NO WARRANTY for this software just as there is no warranty
for GNU software (although this is not GNU software). Specifically
we adopt the same policy towards warranties as the GNU project:
BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM 'AS IS' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
"""
testing_doc = """
The FField class has a number of built in testing functions such as
TestFullDivision, TestInverse. The simplest thing to
do is to call the FullTest method.
>>> import ffield
>>> ffield.FullTest(sizeList=None,testsPerField=100)
# To decrease the testing time you can either decrease the testsPerField
# or you can only test the field sizes you care about by doing something
# like sizeList = [2,7,20] in the ffield.FullTest command above.
If any problems occur, assertion errors are raised. Otherwise
nothing is returned. Note that you can also use the doctest
package to test all the python examples in the documentation
by typing 'python ffield.py' or 'python -v ffield.py' at the
command line.
"""
# The following code is used to make the doctest package
# check examples in docstrings.
__test__ = {
'testing_doc' : testing_doc
}
def _test():
import doctest, ffield
return doctest.testmod(ffield)
if __name__ == "__main__":
print 'Starting automated tests (this may take a while)'
_test()
print 'Tests passed.'

View File

@ -1,218 +0,0 @@
# Copyright Emin Martinian 2002. See below for license terms.
# Version Control Info: $Id: file_ecc.py,v 1.4 2003/10/28 21:28:29 emin Exp $
__doc__ = """
This package implements an erasure correction code for files.
Specifically it lets you take a file F and break it into N
pieces (which are named F.p_0, F.p_1, ..., F.p_N-1) such that
F can be recovered from any K pieces. Since the size of each
piece is F/K (plus some small header information).
How is this better than simply repeating copies of a file?
Firstly, this package lets you get finer grained
redunancy control since producing a duplicate copy of a file
requires at least 100% redundancy while this package lets you
expand the redunancy by n/k (e.g. if n=11, k=10 only 10%
redundancy is added).
Secondly, using a Reed-Solomon code as is done in this package,
allows better loss resistance. For example, assume you just
divided a file F into 4 pieces, F.1, F.2, ..., F.4, duplicated
each piece and placed them each on a different disk. If the
two disks containing a copy of F.1 go down then you can no longer
recover F.
With the Reed-Solomon code used in this package, if you use n=8, k=4
you divide F into 8 pieces such that as long as at least 4 pieces are
available recovery can occur. Thus if you placed each piece on a
seprate disk, you could recover data as if any combination of 4 or
less disks fail.
The docstrings for the functions EncodeFile and DecodeFiles
provide detailed information on usage and the docstring
license_doc describes the license and lack of warranty.
The following is an example of how to use this file:
>>> import file_ecc
>>> testFile = '/bin/ls' # A reasonable size file for testing.
>>> prefix = '/tmp/ls_backup' # Prefix for shares of file.
>>> names = file_ecc.EncodeFile(testFile,prefix,15,11) # break into N=15 pieces
# Imagine that only pieces [0,1,5,4,13,8,9,10,11,12,14] are available.
>>> decList = map(lambda x: prefix + '.p_' + `x`,[0,1,5,4,13,8,9,10,11,12,14])
>>> decodedFile = '/tmp/ls.r' # Choose where we want reconstruction to go.
>>> file_ecc.DecodeFiles(decList,decodedFile)
>>> fd1 = open(testFile,'rb')
>>> fd2 = open(decodedFile,'rb')
>>> fd1.read() == fd2.read()
1
"""
from rs_code import RSCode
from array import array
import os, struct, string
headerSep = '|'
def GetFileSize(fname):
return os.stat(fname)[6]
def MakeHeader(fname,n,k,size):
return string.join(['RS_PARITY_PIECE_HEADER','FILE',fname,
'n',`n`,'k',`k`,'size',`size`,'piece'],
headerSep) + headerSep
def ParseHeader(header):
return string.split(header,headerSep)
def ReadEncodeAndWriteBlock(readSize,inFD,outFD,code):
buffer = array('B')
buffer.fromfile(inFD,readSize)
for i in range(readSize,code.k):
buffer.append(0)
codeVec = code.Encode(buffer)
for j in range(code.n):
outFD[j].write(struct.pack('B',codeVec[j]))
def EncodeFile(fname,prefix,n,k):
"""
Function: EncodeFile(fname,prefix,n,k)
Description: Encodes the file named by fname into n pieces named
prefix.p_0, prefix.p_1, ..., prefix.p_n-1. At least
k of these pieces are needed for recovering fname.
Each piece is roughly the size of fname / k (there
is a very small overhead due to some header information).
Returns a list containing names of files for the pieces.
Note n and k must satisfy 0 < k < n < 257.
Use the DecodeFiles function for decoding.
"""
fileList = []
if (n > 256 or k >= n or k <= 0):
raise Exception, 'Invalid (n,k), need 0 < k < n < 257.'
inFD = open(fname,'rb')
inSize = GetFileSize(fname)
header = MakeHeader(fname,n,k,inSize)
code = RSCode(n,k,8,shouldUseLUT=-(k!=1))
outFD = range(n)
for i in range(n):
outFileName = prefix + '.p_' + `i`
fileList.append(outFileName)
outFD[i] = open(outFileName,'wb')
outFD[i].write(header + `i` + '\n')
if (k == 1): # just doing repetition coding
str = inFD.read(1024)
while (str):
map( lambda x: x.write(str), outFD)
str = inFD.read(256)
else: # do the full blown RS encodding
for i in range(0, (inSize/k)*k,k):
ReadEncodeAndWriteBlock(k,inFD,outFD,code)
if ((inSize % k) > 0):
ReadEncodeAndWriteBlock(inSize % k,inFD,outFD,code)
return fileList
def ExtractPieceNums(fnames,headers):
l = range(len(fnames))
pieceNums = range(len(fnames))
for i in range(len(fnames)):
l[i] = ParseHeader(headers[i])
for i in range(len(fnames)):
if (l[i][0] != 'RS_PARITY_PIECE_HEADER' or
l[i][2] != l[0][2] or l[i][4] != l[0][4] or
l[i][6] != l[0][6] or l[i][8] != l[0][8]):
raise Exception, 'File ' + `fnames[i]` + ' has incorrect header.'
pieceNums[i] = int(l[i][10])
(n,k,size) = (int(l[0][4]),int(l[0][6]),long(l[0][8]))
if (len(pieceNums) < k):
raise Exception, ('Not enough parity for decoding; needed '
+ `l[0][6]` + ' got ' + `len(fnames)` + '.')
return (n,k,size,pieceNums)
def ReadDecodeAndWriteBlock(writeSize,inFDs,outFD,code):
buffer = array('B')
for j in range(code.k):
buffer.fromfile(inFDs[j],1)
result = code.Decode(buffer.tolist())
for j in range(writeSize):
outFD.write(struct.pack('B',result[j]))
def DecodeFiles(fnames,outName):
"""
Function: DecodeFiles(fnames,outName)
Description: Takes pieces of a file created using EncodeFiles and
recovers the original file placing it in outName.
The argument fnames must be a list of at least k
file names generated using EncodeFiles.
"""
inFDs = range(len(fnames))
headers = range(len(fnames))
for i in range(len(fnames)):
inFDs[i] = open(fnames[i],'rb')
headers[i] = inFDs[i].readline()
(n,k,inSize,pieceNums) = ExtractPieceNums(fnames,headers)
outFD = open(outName,'wb')
code = RSCode(n,k,8)
decList = pieceNums[0:k]
code.PrepareDecoder(decList)
for i in range(0, (inSize/k)*k,k):
ReadDecodeAndWriteBlock(k,inFDs,outFD,code)
if ((inSize%k)>0):
ReadDecodeAndWriteBlock(inSize%k,inFDs,outFD,code)
license_doc = """
This code was originally written by Emin Martinian (emin@allegro.mit.edu).
You may copy, modify, redistribute in source or binary form as long
as credit is given to the original author. Specifically, please
include some kind of comment or docstring saying that Emin Martinian
was one of the original authors. Also, if you publish anything based
on this work, it would be nice to cite the original author and any
other contributers.
There is NO WARRANTY for this software just as there is no warranty
for GNU software (although this is not GNU software). Specifically
we adopt the same policy towards warranties as the GNU project:
BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM 'AS IS' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
"""
# The following code is used to make the doctest package
# check examples in docstrings.
def _test():
import doctest, file_ecc
return doctest.testmod(file_ecc)
if __name__ == "__main__":
_test()
print 'Tests passed'

View File

@ -1,869 +0,0 @@
# Copyright Emin Martinian 2002. See below for license terms.
# Version Control Info: $Id: genericmatrix.py,v 1.7 2003/10/28 21:18:41 emin Exp $
"""
This package implements the GenericMatrix class to provide matrix
operations for any type that supports the multiply, add, subtract,
and divide operators. For example, this package can be used to
do matrix calculations over finite fields using the ffield package
available at http://martinian.com.
The following docstrings provide detailed information on various topics:
GenericMatrix.__doc__ Describes the methods of the GenericMatrix
class and how to use them.
license_doc Describes the license and lack of warranty
for this code.
testing_doc Describes some tests to make sure the code works.
"""
import operator
class GenericMatrix:
"""
The GenericMatrix class implements a matrix with works with
any generic type supporting addition, subtraction, multiplication,
and division. Matrix multiplication, addition, and subtraction
are implemented as are methods for finding inverses,
LU (actually LUP) decompositions, and determinants. A complete
list of user callable methods is:
__init__
__repr__
__mul__
__add__
__sub__
__setitem__
__getitem__
Size
SetRow
GetRow
GetColumn
Copy
MakeSimilarMatrix
SwapRows
MulRow
AddRow
AddCol
MulAddRow
LeftMulColumnVec
LowerGaussianElim
Inverse
Determinant
LUP
A quick and dirty example of how to use the GenericMatrix class
for matricies of floats is provided below.
>>> import genericmatrix
>>> v = genericmatrix.GenericMatrix((3,3))
>>> v.SetRow(0,[0.0, -1.0, 1.0])
>>> v.SetRow(1,[1.0, 1.0, 1.0])
>>> v.SetRow(2,[1.0, 1.0, -1.0])
>>> v
<matrix
0.0 -1.0 1.0
1.0 1.0 1.0
1.0 1.0 -1.0>
>>> vi = v.Inverse()
>>> vi
<matrix
1.0 0.0 1.0
-1.0 0.5 -0.5
-0.0 0.5 -0.5>
>>> (vi * v) - v.MakeSimilarMatrix(v.Size(),'i')
<matrix
0.0 0.0 0.0
0.0 0.0 0.0
0.0 0.0 0.0>
# See what happens when we try to invert a non-invertible matrix
>>> v[0,1] = 0.0
>>> v
<matrix
0.0 0.0 1.0
1.0 1.0 1.0
1.0 1.0 -1.0>
>>> abs(v.Determinant())
0.0
>>> v.Inverse()
Traceback (most recent call last):
...
ValueError: matrix not invertible
# LUP decomposition will still work even if Inverse() won't.
>>> (l,u,p) = v.LUP()
>>> l
<matrix
1.0 0.0 0.0
0.0 1.0 0.0
1.0 0.0 1.0>
>>> u
<matrix
1.0 1.0 1.0
0.0 0.0 1.0
0.0 0.0 -2.0>
>>> p
<matrix
0.0 1.0 0.0
1.0 0.0 0.0
0.0 0.0 1.0>
>>> p * v - l * u
<matrix
0.0 0.0 0.0
0.0 0.0 0.0
0.0 0.0 0.0>
# Operate on some column vectors using v.
# The LeftMulColumnVec methods lets us do this without having
# to construct a new GenericMatrix to represent each column vector.
>>> v.LeftMulColumnVec([1.0,2.0,3.0])
[3.0, 6.0, 0.0]
>>> v.LeftMulColumnVec([1.0,-2.0,1.0])
[1.0, 0.0, -2.0]
# Most of the stuff above could be done with something like matlab.
# But, with this package you can do matrix ops for finite fields.
>>> XOR = lambda x,y: x^y
>>> AND = lambda x,y: x&y
>>> DIV = lambda x,y: x
>>> m = GenericMatrix(size=(3,4),zeroElement=0,identityElement=1,add=XOR,mul=AND,sub=XOR,div=DIV)
>>> m.SetRow(0,[0,1,0,0])
>>> m.SetRow(1,[0,1,0,1])
>>> m.SetRow(2,[0,0,1,0])
>>> # You can't invert m since it isn't square, but you can still
>>> # get the LUP decomposition or solve a system of equations.
>>> (l,u,p) = v.LUP()
>>> p*v-l*u
<matrix
0.0 0.0 0.0
0.0 0.0 0.0
0.0 0.0 0.0>
>>> b = [1,0,1]
>>> x = m.Solve(b)
>>> b == m.LeftMulColumnVec(x)
1
"""
def __init__(self, size=(2,2), zeroElement=0.0, identityElement=1.0,
add=operator.__add__, sub=operator.__sub__,
mul=operator.__mul__, div = operator.__div__,
eq = operator.__eq__, str=lambda x:`x`,
equalsZero = None,fillMode='z'):
"""
Function: __init__(size,zeroElement,identityElement,
add,sub,mul,div,eq,str,equalsZero fillMode)
Description: This is the constructor for the GenericMatrix
class. All arguments are optional and default
to producing a 2-by-2 zero matrix for floats.
A detailed description of arguments follows:
size: A tuple of the form (numRows, numColumns)
zeroElement: An object representing the additive
identity (i.e. 'zero') for the data
type of interest.
identityElement: An object representing the multiplicative
identity (i.e. 'one') for the data
type of interest.
add,sub,mul,div: Functions implementing basic arithmetic
operations for the type of interest.
eq: A function such that eq(x,y) == 1 if and only if x == y.
str: A function used to produce a string representation of
the type of interest.
equalsZero: A function used to decide if an element is
essentially zero. For floats, you could use
lambda x: abs(x) < 1e-6.
fillMode: This can either be 'e' in which case the contents
of the matrix are left empty, 'z', in which case
the matrix is filled with zeros, 'i' in which
case an identity matrix is created, or a two
argument function which is called with the row
and column of each index and produces the value
for that entry. Default is 'z'.
"""
if (None == equalsZero):
equalsZero = lambda x: self.eq(self.zeroElement,x)
self.equalsZero = equalsZero
self.add = add
self.sub = sub
self.mul = mul
self.div = div
self.eq = eq
self.str = str
self.zeroElement = zeroElement
self.identityElement = identityElement
self.rows, self.cols = size
self.data = []
def q(x,y,z):
if (x):
return y
else:
return z
if (fillMode == 'e'):
return
elif (fillMode == 'z'):
fillMode = lambda x,y: self.zeroElement
elif (fillMode == 'i'):
fillMode = lambda x,y: q(self.eq(x,y),self.identityElement,
self.zeroElement)
for i in range(self.rows):
self.data.append(map(fillMode,[i]*self.cols,range(self.cols)))
def MakeSimilarMatrix(self,size,fillMode):
"""
MakeSimilarMatrix(self,size,fillMode)
Return a matrix of the given size filled according to fillMode
with the same zeroElement, identityElement, add, sub, etc.
as self.
For example, self.MakeSimilarMatrix(self.Size(),'i') returns
an identity matrix of the same shape as self.
"""
return GenericMatrix(size=size,zeroElement=self.zeroElement,
identityElement=self.identityElement,
add=self.add,sub=self.sub,
mul=self.mul,div=self.div,eq=self.eq,
str=self.str,equalsZero=self.equalsZero,
fillMode=fillMode)
def __repr__(self):
m = 0
# find the fattest element
for r in self.data:
for c in r:
l = len(self.str(c))
if l > m:
m = l
f = '%%%ds' % (m+1)
s = '<matrix'
for r in self.data:
s = s + '\n'
for c in r:
s = s + (f % self.str(c))
s = s + '>'
return s
def __mul__(self,other):
if (self.cols != other.rows):
raise ValueError, "dimension mismatch"
result = self.MakeSimilarMatrix((self.rows,other.cols),'z')
for i in range(self.rows):
for j in range(other.cols):
result.data[i][j] = reduce(self.add,
map(self.mul,self.data[i],
other.GetColumn(j)))
return result
def __add__(self,other):
if (self.cols != other.rows):
raise ValueError, "dimension mismatch"
result = self.MakeSimilarMatrix(size=self.Size(),fillMode='z')
for i in range(self.rows):
for j in range(other.cols):
result.data[i][j] = self.add(self.data[i][j],other.data[i][j])
return result
def __sub__(self,other):
if (self.cols != other.cols or self.rows != other.rows):
raise ValueError, "dimension mismatch"
result = self.MakeSimilarMatrix(size=self.Size(),fillMode='z')
for i in range(self.rows):
for j in range(other.cols):
result.data[i][j] = self.sub(self.data[i][j],
other.data[i][j])
return result
def __setitem__ (self, (x,y), data):
"__setitem__((x,y),data) sets item row x and column y to data."
self.data[x][y] = data
def __getitem__ (self, (x,y)):
"__getitem__((x,y)) gets item at row x and column y."
return self.data[x][y]
def Size (self):
"returns (rows, columns)"
return (len(self.data), len(self.data[0]))
def SetRow(self,r,result):
"SetRow(r,result) sets row r to result."
assert len(result) == self.cols, ('Wrong # columns in row: ' +
'expected ' + `self.cols` + ', got '
+ `len(result)`)
self.data[r] = list(result)
def GetRow(self,r):
"GetRow(r) returns a copy of row r."
return list(self.data[r])
def GetColumn(self,c):
"GetColumn(c) returns a copy of column c."
if (c >= self.cols):
raise ValueError, 'matrix does not have that many columns'
result = []
for r in self.data:
result.append(r[c])
return result
def Transpose(self):
oldData = self.data
self.data = []
for r in range(self.cols):
self.data.append([])
for c in range(self.rows):
self.data[r].append(oldData[c][r])
rows = self.rows
self.rows = self.cols
self.cols = rows
def Copy(self):
result = self.MakeSimilarMatrix(size=self.Size(),fillMode='e')
for r in self.data:
result.data.append(list(r))
return result
def SubMatrix(self,rowStart,rowEnd,colStart=0,colEnd=None):
"""
SubMatrix(self,rowStart,rowEnd,colStart,colEnd)
Create and return a sub matrix containg rows
rowStart through rowEnd (inclusive) and columns
colStart through colEnd (inclusive).
"""
if (not colEnd):
colEnd = self.cols-1
if (rowEnd >= self.rows):
raise ValueError, 'rowEnd too big: rowEnd >= self.rows'
result = self.MakeSimilarMatrix((rowEnd-rowStart+1,colEnd-colStart+1),
'e')
for i in range(rowStart,rowEnd+1):
result.data.append(list(self.data[i][colStart:(colEnd+1)]))
return result
def UnSubMatrix(self,rowStart,rowEnd,colStart,colEnd):
"""
UnSubMatrix(self,rowStart,rowEnd,colStart,colEnd)
Create and return a sub matrix containg everything except
rows rowStart through rowEnd (inclusive)
and columns colStart through colEnd (inclusive).
"""
result = self.MakeSimilarMatrix((self.rows-(rowEnd-rowStart),
self.cols-(colEnd-colStart)),'e')
for i in range(0,rowStart) + range(rowEnd,self.rows):
result.data.append(list(self.data[i][0:colStart] +
self.data[i][colEnd:]))
return result
def SwapRows(self,i,j):
temp = list(self.data[i])
self.data[i] = list(self.data[j])
self.data[j] = temp
def MulRow(self,r,m,start=0):
"""
Function: MulRow(r,m,start=0)
Multiply row r by m starting at optional column start (default 0).
"""
row = self.data[r]
for i in range(start,self.cols):
row[i] = self.mul(row[i],m)
def AddRow(self,i,j):
"""
Add row i to row j.
"""
self.data[j] = map(self.add,self.data[i],self.data[j])
def AddCol(self,i,j):
"""
Add column i to column j.
"""
for r in range(self.rows):
self.data[r][j] = self.add(self.data[r][i],self.data[r][j])
def MulAddRow(self,m,i,j):
"""
Multiply row i by m and add to row j.
"""
self.data[j] = map(self.add,
map(self.mul,[m]*self.cols,self.data[i]),
self.data[j])
def LeftMulColumnVec(self,colVec):
"""
Function: LeftMulColumnVec(c)
Purpose: Compute the result of self * c.
Description: This function taks as input a list c,
computes the desired result and returns it
as a list. This is sometimes more convenient
than constructed a new GenericMatrix to represent
c, computing the result and extracting c to a list.
"""
if (self.cols != len(colVec)):
raise ValueError, 'dimension mismatch'
result = range(self.rows)
for r in range(self.rows):
result[r] = reduce(self.add,map(self.mul,self.data[r],colVec))
return result
def FindRowLeader(self,startRow,c):
for r in range(startRow,self.rows):
if (not self.eq(self.zeroElement,self.data[r][c])):
return r
return -1
def FindColLeader(self,r,startCol):
for c in range(startCol,self.cols):
if (not self.equalsZero(self.data[r][c])):
return c
return -1
def PartialLowerGaussElim(self,rowIndex,colIndex,resultInv):
"""
Function: PartialLowerGaussElim(rowIndex,colIndex,resultInv)
This function does partial Gaussian elimination on the part of
the matrix on and below the main diagonal starting from
rowIndex. In addition to modifying self, this function
applies the required elmentary row operations to the input
matrix resultInv.
By partial, what we mean is that if this function encounters
an element on the diagonal which is 0, it stops and returns
the corresponding rowIndex. The caller can then permute
self or apply some other operation to eliminate the zero
and recall PartialLowerGaussElim.
This function is meant to be combined with UpperInverse
to compute inverses and LU decompositions.
"""
lastRow = self.rows-1
while (rowIndex < lastRow):
if (colIndex >= self.cols):
return (rowIndex, colIndex)
if (self.eq(self.zeroElement,self.data[rowIndex][colIndex])):
# self[rowIndex,colIndex] = 0 so quit.
return (rowIndex, colIndex)
divisor = self.div(self.identityElement,
self.data[rowIndex][colIndex])
for k in range(rowIndex+1,self.rows):
nextTerm = self.data[k][colIndex]
if (self.zeroElement != nextTerm):
multiple = self.mul(divisor,self.sub(self.zeroElement,
nextTerm))
self.MulAddRow(multiple,rowIndex,k)
resultInv.MulAddRow(multiple,rowIndex,k)
rowIndex = rowIndex + 1
colIndex = colIndex + 1
return (rowIndex, colIndex)
def LowerGaussianElim(self,resultInv=''):
"""
Function: LowerGaussianElim(r)
Purpose: Perform Gaussian elimination on self to eliminate
all terms below the diagonal.
Description: This method modifies self via Gaussian elimination
and applies the elementary row operations used in
this transformation to the input matrix, r
(if one is provided, otherwise a matrix with
identity elements on the main diagonal is
created to serve the role of r).
Thus if the input, r, is an identity matrix, after
the call it will represent the transformation
made to perform Gaussian elimination.
The matrix r is returned.
"""
if (resultInv == ''):
resultInv = self.MakeSimilarMatrix(self.Size(),'i')
(rowIndex,colIndex) = (0,0)
lastRow = min(self.rows - 1,self.cols)
lastCol = self.cols - 1
while( rowIndex < lastRow and colIndex < lastCol):
leader = self.FindRowLeader(rowIndex,colIndex)
if (leader < 0):
colIndex = colIndex + 1
continue
if (leader != rowIndex):
resultInv.AddRow(leader,rowIndex)
self.AddRow(leader,rowIndex)
(rowIndex,colIndex) = (
self.PartialLowerGaussElim(rowIndex,colIndex,resultInv))
return resultInv
def UpperInverse(self,resultInv=''):
"""
Function: UpperInverse(resultInv)
Assumes that self is an upper triangular matrix like
[a b c ... ]
[0 d e ... ]
[0 0 f ... ]
[. . ]
[. . ]
[. . ]
and performs Gaussian elimination to transform self into
the identity matrix. The required elementary row operations
are applied to the matrix resultInv passed as input. For
example, if the identity matrix is passed as input, then the
value returned is the inverse of self before the function
was called.
If no matrix, resultInv, is provided as input then one is
created with identity elements along the main diagonal.
In either case, resultInv is returned as output.
"""
if (resultInv == ''):
resultInv = self.MakeSimilarMatrix(self.Size(),'i')
lastCol = min(self.rows,self.cols)
for colIndex in range(0,lastCol):
if (self.zeroElement == self.data[colIndex][colIndex]):
raise ValueError, 'matrix not invertible'
divisor = self.div(self.identityElement,
self.data[colIndex][colIndex])
if (self.identityElement != divisor):
self.MulRow(colIndex,divisor,colIndex)
resultInv.MulRow(colIndex,divisor)
for rowToElim in range(0,colIndex):
multiple = self.sub(self.zeroElement,
self.data[rowToElim][colIndex])
self.MulAddRow(multiple,colIndex,rowToElim)
resultInv.MulAddRow(multiple,colIndex,rowToElim)
return resultInv
def Inverse(self):
"""
Function: Inverse
Description: Returns the inverse of self without modifying
self. An exception is raised if the matrix
is not invertable.
"""
workingCopy = self.Copy()
result = self.MakeSimilarMatrix(self.Size(),'i')
workingCopy.LowerGaussianElim(result)
workingCopy.UpperInverse(result)
return result
def Determinant(self):
"""
Function: Determinant
Description: Returns the determinant of the matrix or raises
a ValueError if the matrix is not square.
"""
if (self.rows != self.cols):
raise ValueError, 'matrix not square'
workingCopy = self.Copy()
result = self.MakeSimilarMatrix(self.Size(),'i')
workingCopy.LowerGaussianElim(result)
det = self.identityElement
for i in range(self.rows):
det = det * workingCopy.data[i][i]
return det
def LUP(self):
"""
Function: (l,u,p) = self.LUP()
Purpose: Compute the LUP decomposition of self.
Description: This function returns three matrices
l, u, and p such that p * self = l * u
where l, u, and p have the following properties:
l is lower triangular with ones on the diagonal
u is upper triangular
p is a permutation matrix.
The idea behind the algorithm is to first
do Gaussian elimination to obtain an upper
triangular matrix u and lower triangular matrix
r such that r * self = u, then by inverting r to
get l = r ^-1 we obtain self = r^-1 * u = l * u.
Note tha since r is lower triangular its
inverse must also be lower triangular.
Where does the p come in? Well, with some
matrices our technique doesn't work due to
zeros appearing on the diagonal of r. So we
apply some permutations to the orginal to
prevent this.
"""
upper = self.Copy()
resultInv = self.MakeSimilarMatrix(self.Size(),'i')
perm = self.MakeSimilarMatrix((self.rows,self.rows),'i')
(rowIndex,colIndex) = (0,0)
lastRow = self.rows - 1
lastCol = self.cols - 1
while( rowIndex < lastRow and colIndex < lastCol ):
leader = upper.FindRowLeader(rowIndex,colIndex)
if (leader < 0):
colIndex = colIndex+1
continue
if (leader != rowIndex):
upper.SwapRows(leader,rowIndex)
resultInv.SwapRows(leader,rowIndex)
perm.SwapRows(leader,rowIndex)
(rowIndex,colIndex) = (
upper.PartialLowerGaussElim(rowIndex,colIndex,resultInv))
lower = self.MakeSimilarMatrix((self.rows,self.rows),'i')
resultInv.LowerGaussianElim(lower)
resultInv.UpperInverse(lower)
# possible optimization: due perm*lower explicitly without
# relying on the * operator.
return (perm*lower, upper, perm)
def Solve(self,b):
"""
Solve(self,b):
b: A list.
Returns the values of x such that Ax = b.
This is done using the LUP decomposition by
noting that Ax = b implies PAx = Pb implies LUx = Pb.
First we solve for Ly = Pb and then we solve Ux = y.
The following is an example of how to use Solve:
>>> # Floating point example
>>> import genericmatrix
>>> A = genericmatrix.GenericMatrix(size=(2,5),str=lambda x: '%.4f' % x)
>>> A.SetRow(0,[0.0, 0.0, 0.160, 0.550, 0.280])
>>> A.SetRow(1,[0.0, 0.0, 0.745, 0.610, 0.190])
>>> A
<matrix
0.0000 0.0000 0.1600 0.5500 0.2800
0.0000 0.0000 0.7450 0.6100 0.1900>
>>> b = [0.975, 0.350]
>>> x = A.Solve(b)
>>> z = A.LeftMulColumnVec(x)
>>> diff = reduce(lambda xx,yy: xx+yy,map(lambda aa,bb: abs(aa-bb),b,z))
>>> diff > 1e-6
0
>>> # Boolean example
>>> XOR = lambda x,y: x^y
>>> AND = lambda x,y: x&y
>>> DIV = lambda x,y: x
>>> m=GenericMatrix(size=(3,6),zeroElement=0,identityElement=1,add=XOR,mul=AND,sub=XOR,div=DIV)
>>> m.SetRow(0,[1,0,0,1,0,1])
>>> m.SetRow(1,[0,1,1,0,1,0])
>>> m.SetRow(2,[0,1,0,1,1,0])
>>> b = [0, 1, 1]
>>> x = m.Solve(b)
>>> z = m.LeftMulColumnVec(x)
>>> z
[0, 1, 1]
"""
assert self.cols >= self.rows
(L,U,P) = self.LUP()
Pb = P.LeftMulColumnVec(b)
y = [0]*len(Pb)
for row in range(L.rows):
y[row] = Pb[row]
for i in range(row+1,L.rows):
Pb[i] = L.sub(Pb[i],L.mul(L[i,row],Pb[row]))
x = [0]*self.cols
curRow = self.rows-1
for curRow in range(len(y)-1,-1,-1):
col = U.FindColLeader(curRow,0)
assert col > -1
x[col] = U.div(y[curRow],U[curRow,col])
y[curRow] = x[col]
for i in range(0,curRow):
y[i] = U.sub(y[i],U.mul(U[i,col],y[curRow]))
return x
def DotProduct(mul,add,x,y):
"""
Function: DotProduct(mul,add,x,y)
Description: Return the dot product of lists x and y using mul and
add as the multiplication and addition operations.
"""
assert len(x) == len(y), 'sizes do not match'
return reduce(add,map(mul,x,y))
class GenericMatrixTester:
def DoTests(self,numTests,sizeList):
"""
Function: DoTests(numTests,sizeList)
Description: For each test, run numTests tests for square
matrices with the sizes in sizeList.
"""
for size in sizeList:
self.RandomInverseTest(size,numTests)
self.RandomLUPTest(size,numTests)
self.RandomSolveTest(size,numTests)
self.RandomDetTest(size,numTests)
def MakeRandom(self,s):
import random
r = GenericMatrix(size=s,fillMode=lambda x,y: random.random(),
equalsZero = lambda x: abs(x) < 1e-6)
return r
def MatAbs(self,m):
r = -1
(N,M) = m.Size()
for i in range(0,N):
for j in range(0,M):
if (abs(m[i,j]) > r):
r = abs(m[i,j])
return r
def RandomInverseTest(self,s,n):
ident = GenericMatrix(size=(s,s),fillMode='i')
for i in range(n):
m = self.MakeRandom((s,s))
assert self.MatAbs(ident - m * m.Inverse()) < 1e-6, (
'offender = ' + `m`)
def RandomLUPTest(self,s,n):
ident = GenericMatrix(size=(s,s),fillMode='i')
for i in range(n):
m = self.MakeRandom((s,s))
(l,u,p) = m.LUP()
assert self.MatAbs(p*m - l*u) < 1e-6, 'offender = ' + `m`
def RandomSolveTest(self,s,n):
import random
if (s <= 1):
return
extraEquations=3
for i in range(n):
m = self.MakeRandom((s,s+extraEquations))
for j in range(extraEquations):
colToKill = random.randrange(s+extraEquations)
for r in range(m.rows):
m[r,colToKill] = 0.0
b = map(lambda x: random.random(), range(s))
x = m.Solve(b)
z = m.LeftMulColumnVec(x)
diff = reduce(lambda xx,yy:xx+yy, map(lambda aa,bb:abs(aa-bb),b,z))
assert diff < 1e-6, ('offenders: m = ' + `m` + '\nx = ' + `x`
+ '\nb = ' + `b` + '\ndiff = ' + `diff`)
def RandomDetTest(self,s,n):
for i in range(n):
m1 = self.MakeRandom((s,s))
m2 = self.MakeRandom((s,s))
prod = m1 * m2
assert (abs(m1.Determinant() * m2.Determinant()
- prod.Determinant() )
< 1e-6), 'offenders = ' + `m1` + `m2`
license_doc = """
This code was originally written by Emin Martinian (emin@allegro.mit.edu).
You may copy, modify, redistribute in source or binary form as long
as credit is given to the original author. Specifically, please
include some kind of comment or docstring saying that Emin Martinian
was one of the original authors. Also, if you publish anything based
on this work, it would be nice to cite the original author and any
other contributers.
There is NO WARRANTY for this software just as there is no warranty
for GNU software (although this is not GNU software). Specifically
we adopt the same policy towards warranties as the GNU project:
BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM 'AS IS' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
"""
testing_doc = """
The GenericMatrixTester class contains some simple
testing functions such as RandomInverseTest, RandomLUPTest,
RandomSolveTest, and RandomDetTest which generate random floating
point values and test the appropriate routines. The simplest way to
run these tests is via
>>> import genericmatrix
>>> t = genericmatrix.GenericMatrixTester()
>>> t.DoTests(100,[1,2,3,4,5,10])
# runs 100 tests each for sizes 1-5, 10
# note this may take a few minutes
If any problems occur, assertion errors are raised. Otherwise
nothing is returned. Note that you can also use the doctest
package to test all the python examples in the documentation
by typing 'python genericmatrix.py' or 'python -v genericmatrix.py' at the
command line.
"""
# The following code is used to make the doctest package
# check examples in docstrings when you enter
__test__ = {
'testing_doc' : testing_doc
}
def _test():
import doctest, genericmatrix
return doctest.testmod(genericmatrix)
if __name__ == "__main__":
_test()
print 'Tests Passed.'

View File

@ -1,246 +0,0 @@
# Copyright Emin Martinian 2002. See below for license terms.
# Version Control Info: $Id: rs_code.py,v 1.5 2003/07/04 01:30:05 emin Exp $
"""
This package implements the RSCode class designed to do
Reed-Solomon encoding and (erasure) decoding. The following
docstrings provide detailed information on various topics.
RSCode.__doc__ Describes the RSCode class and how to use it.
license_doc Describes the license and lack of warranty.
"""
import ffield
import genericmatrix
import math
class RSCode:
"""
The RSCode class implements a Reed-Solomon code
(currently, only erasure decoding not error decoding is
implemented). The relevant methods are:
__init__
Encode
DecodeImmediate
Decode
PrepareDecoder
RandomTest
A breif example of how to use the code follows:
>>> import rs_code
# Create a coder for an (n,k) = (16,8) code and test
# decoding for a simple erasure pattern.
>>> C = rs_code.RSCode(16,8)
>>> inVec = range(8)
>>> codedVec = C.Encode(inVec)
>>> receivedVec = list(codedVec)
# now erase some entries in the encoded vector by setting them to None
>>> receivedVec[3] = None; receivedVec[9] = None; receivedVec[12] = None
>>> receivedVec
[0, 1, 2, None, 4, 5, 6, 7, 8, None, 10, 11, None, 13, 14, 15]
>>> decVec = C.DecodeImmediate(receivedVec)
>>> decVec
[0, 1, 2, 3, 4, 5, 6, 7]
# Now try the random testing method for more complete coverage.
# Note this will take a while.
>>> for k in range(1,8):
... for p in range(1,12):
... C = rs_code.RSCode(k+p,k)
... C.RandomTest(25)
>>> for k in range(1,8):
... for p in range(1,12):
... C = rs_code.RSCode(k+p,k,systematic=0)
... C.RandomTest(25)
"""
def __init__(self,n,k,log2FieldSize=-1,systematic=1,shouldUseLUT=-1):
"""
Function: __init__(n,k,log2FieldSize,systematic,shouldUseLUT)
Purpose: Create a Reed-Solomon coder for an (n,k) code.
Notes: The last parameters, log2FieldSize, systematic
and shouldUseLUT are optional.
The log2FieldSize parameter
represents the base 2 logarithm of the field size.
If it is omitted, the field GF(2^p) is used where
p is the smalles integer where 2^p >= n.
If systematic is true then a systematic encoder
is created (i.e. one where the first k symbols
of the encoded result always match the data).
If shouldUseLUT = 1 then a lookup table is used for
computing finite field multiplies and divides.
If shouldUseLUT = 0 then no lookup table is used.
If shouldUseLUT = -1 (the default), then the code
decides when a lookup table should be used.
"""
if (log2FieldSize < 0):
log2FieldSize = int(math.ceil(math.log(n)/math.log(2)))
self.field = ffield.FField(log2FieldSize,useLUT=shouldUseLUT)
self.n = n
self.k = k
self.fieldSize = 1 << log2FieldSize
self.CreateEncoderMatrix()
if (systematic):
self.encoderMatrix.Transpose()
self.encoderMatrix.LowerGaussianElim()
self.encoderMatrix.UpperInverse()
self.encoderMatrix.Transpose()
def __repr__(self):
rep = ('<RSCode (n,k) = (' + `self.n` +', ' + `self.k` + ')'
+ ' over GF(2^' + `self.field.n` + ')\n' +
`self.encoderMatrix` + '\n' + '>')
return rep
def CreateEncoderMatrix(self):
self.encoderMatrix = genericmatrix.GenericMatrix(
(self.n,self.k),0,1,self.field.Add,self.field.Subtract,
self.field.Multiply,self.field.Divide)
self.encoderMatrix[0,0] = 1
for i in range(0,self.n):
term = 1
for j in range(0, self.k):
self.encoderMatrix[i,j] = term
term = self.field.Multiply(term,i)
def Encode(self,data):
"""
Function: Encode(data)
Purpose: Encode a list of length k into length n.
"""
assert len(data)==self.k, 'Encode: input data must be size k list.'
return self.encoderMatrix.LeftMulColumnVec(data)
def PrepareDecoder(self,unErasedLocations):
"""
Function: PrepareDecoder(erasedTerms)
Description: The input unErasedLocations is a list of the first
self.k elements of the codeword which were
NOT erased. For example, if the 0th, 5th,
and 7th symbols of a (16,5) code were erased,
then PrepareDecoder([1,2,3,4,6]) would
properly prepare for decoding.
"""
if (len(unErasedLocations) != self.k):
raise ValueError, 'input must be exactly length k'
limitedEncoder = genericmatrix.GenericMatrix(
(self.k,self.k),0,1,self.field.Add,self.field.Subtract,
self.field.Multiply,self.field.Divide)
for i in range(0,self.k):
limitedEncoder.SetRow(
i,self.encoderMatrix.GetRow(unErasedLocations[i]))
self.decoderMatrix = limitedEncoder.Inverse()
def Decode(self,unErasedTerms):
"""
Function: Decode(unErasedTerms)
Purpose: Use the
Description:
"""
return self.decoderMatrix.LeftMulColumnVec(unErasedTerms)
def DecodeImmediate(self,data):
"""
Function: DecodeImmediate(data)
Description: Takes as input a data vector of length self.n
where erased symbols are set to None and
returns the decoded result provided that
at least self.k symbols are not None.
For example, for an (n,k) = (6,4) code, a
decodable input vector would be
[2, 0, None, 1, 2, None].
"""
if (len(data) != self.n):
raise ValueError, 'input must be a length n list'
unErasedLocations = []
unErasedTerms = []
for i in range(self.n):
if (None != data[i]):
unErasedLocations.append(i)
unErasedTerms.append(data[i])
self.PrepareDecoder(unErasedLocations[0:self.k])
return self.Decode(unErasedTerms[0:self.k])
def RandomTest(self,numTests):
import random
maxErasures = self.n-self.k
for i in range(numTests):
inVec = range(self.k)
for j in range(self.k):
inVec[j] = random.randint(0, (1<<self.field.n)-1)
codedVec = self.Encode(inVec)
numErasures = random.randint(0,maxErasures)
for j in range(numErasures):
j = random.randint(0,self.n-1)
while(codedVec[j] == None):
j = random.randint(0,self.n-1)
codedVec[j] = None
decVec = self.DecodeImmediate(codedVec)
assert decVec == inVec, ('inVec = ' + `inVec`
+ '\ncodedVec = ' + `codedVec`
+ '\ndecVec = ' + `decVec`)
license_doc = """
This code was originally written by Emin Martinian (emin@allegro.mit.edu).
You may copy, modify, redistribute in source or binary form as long
as credit is given to the original author. Specifically, please
include some kind of comment or docstring saying that Emin Martinian
was one of the original authors. Also, if you publish anything based
on this work, it would be nice to cite the original author and any
other contributers.
There is NO WARRANTY for this software just as there is no warranty
for GNU software (although this is not GNU software). Specifically
we adopt the same policy towards warranties as the GNU project:
BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM 'AS IS' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.
"""
# The following code is used to make the doctest package
# check examples in docstrings.
def _test():
import doctest, rs_code
return doctest.testmod(rs_code)
if __name__ == "__main__":
_test()
print 'Tests passed'