encoding: fix the last py_ecc problem, tests pass now

2025-05-31 14:50:52 +00:00 · 2007-01-05 00:06:42 -07:00 · 2007-01-05 00:06:42 -07:00 · e1c6ee9dcf
commit e1c6ee9dcf
parent 436409fa4d
2 changed files with 30 additions and 13 deletions
--- a/src/allmydata/encode.py
+++ b/src/allmydata/encode.py
@ -98,6 +98,17 @@ class PyRSEncoder(object):
    # the serialized parameters to strip this padding out on the receiving
    # end.

+    # TODO: this will write a 733kB file called 'ffield.lut.8' in the current
+    # directory the first time it is run, to cache the lookup table for later
+    # use. It appears to take about 15 seconds to create this the first time.
+    # Make sure this file winds up somewhere reasonable.
+
+    # TODO: the encoder/decoder RSCode object depends upon the number of
+    # required/total shares, but not upon the data. We could probably save a
+    # lot of initialization time by caching a single instance and using it
+    # any time we use the same required/total share numbers (which will
+    # probably be always).
+
    def set_params(self, data_size, required_shares, total_shares):
        assert required_shares <= total_shares
        self.data_size = data_size
@ -159,12 +170,13 @@ class PyRSDecoder(object):
        self.share_size = self.num_chunks
        self.encoder = rs_code.RSCode(self.total_shares, self.required_shares,
                                      8)
-        #print "chunk_size: %d" % self.chunk_size
-        #print "num_chunks: %d" % self.num_chunks
-        #print "last_chunk_padding: %d" % self.last_chunk_padding
-        #print "share_size: %d" % self.share_size
-        #print "total_shares: %d" % self.total_shares
-        #print "required_shares: %d" % self.required_shares
+        if False:
+            print "chunk_size: %d" % self.chunk_size
+            print "num_chunks: %d" % self.num_chunks
+            print "last_chunk_padding: %d" % self.last_chunk_padding
+            print "share_size: %d" % self.share_size
+            print "total_shares: %d" % self.total_shares
+            print "required_shares: %d" % self.required_shares

    def decode(self, some_shares):
        chunk_size = self.chunk_size
@ -176,7 +188,6 @@ class PyRSDecoder(object):
        for i in range(self.share_size):
            # this takes one byte from each share, and turns the combination
            # into a single chunk
-            #print "PULLING"
            received_vector = []
            for j in range(self.total_shares):
                share = have_shares.get(j)
@ -186,16 +197,12 @@ class PyRSDecoder(object):
                    received_vector.append(None)
            decoded_vector = self.encoder.DecodeImmediate(received_vector)
            assert len(decoded_vector) == self.chunk_size
-            #print "DECODED: %d" % len(decoded_vector)
            chunk = "".join([chr(x) for x in decoded_vector])
-            #print "appending %d bytes" % len(chunk)
            chunks.append(chunk)
        data = "".join(chunks)
-        #print "pre-stripped length: %d" % len(data)
        if self.last_chunk_padding:
            data = data[:-self.last_chunk_padding]
-        #print "post-stripped length: %d" % len(data)
-        assert len(data) == chunk_size
+        assert len(data) == self.data_size
        return defer.succeed(data)


--- a/src/allmydata/test/test_encode_share.py
+++ b/src/allmydata/test/test_encode_share.py
@ -2,6 +2,7 @@
 import os
 from twisted.trial import unittest
 from twisted.internet import defer
+from twisted.python import log
 from allmydata.encode import PyRSEncoder, PyRSDecoder, ReplicatingEncoder, ReplicatingDecoder
 import random

@ -14,6 +15,7 @@ class Tester:
        enc = self.enc_class()
        enc.set_params(size, required_shares, total_shares)
        serialized_params = enc.get_serialized_params()
+        log.msg("serialized_params: %s" % serialized_params)
        d = enc.encode(data0)
        def _done(shares):
            self.failUnlessEqual(len(shares), total_shares)
@ -31,20 +33,23 @@ class Tester:
            self.failUnless(data1 == data0)

        def _decode_all_ordered(res):
+            log.msg("_decode_all_ordered")
            # can we decode using all of the shares?
            return _decode(self.shares)
        d.addCallback(_decode_all_ordered)
        d.addCallback(_check_data)

        def _decode_all_shuffled(res):
+            log.msg("_decode_all_shuffled")
            # can we decode, using all the shares, but in random order?
            shuffled_shares = self.shares[:]
            random.shuffle(shuffled_shares)
            return _decode(shuffled_shares)
        d.addCallback(_decode_all_shuffled)
        d.addCallback(_check_data)
-        
+
        def _decode_some(res):
+            log.msg("_decode_some")
            # decode with a minimal subset of the shares
            some_shares = self.shares[:required_shares]
            return _decode(some_shares)
@ -52,6 +57,7 @@ class Tester:
        d.addCallback(_check_data)

        def _decode_some_random(res):
+            log.msg("_decode_some_random")
            # use a randomly-selected minimal subset
            some_shares = random.sample(self.shares, required_shares)
            return _decode(some_shares)
@ -59,6 +65,7 @@ class Tester:
        d.addCallback(_check_data)

        def _decode_multiple(res):
+            log.msg("_decode_multiple")
            # make sure we can re-use the decoder object
            shares1 = random.sample(self.shares, required_shares)
            shares2 = random.sample(self.shares, required_shares)
@ -79,6 +86,9 @@ class Tester:
    def test_encode1(self):
        return self.do_test(8, 8, 16)

+    def test_encode2(self):
+        return self.do_test(123, 25, 100)
+
    def test_sizes(self):
        raise unittest.SkipTest("omg this would take forever")
        d = defer.succeed(None)