Fold _encode_tail_segment in to _encode_segment

2025-04-13 13:53:07 +00:00 · 2021-01-01 15:14:47 -05:00 · 2021-01-01 15:14:47 -05:00 · c0358b3e03
commit c0358b3e03
parent 1bdca909fa
1 changed files with 23 additions and 30 deletions
--- a/src/allmydata/immutable/encode.py
+++ b/src/allmydata/immutable/encode.py
@ -255,11 +255,11 @@ class Encoder(object):
            # captures the slot, not the value
            #d.addCallback(lambda res: self.do_segment(i))
            # use this form instead:
-            d.addCallback(lambda res, i=i: self._encode_segment(i))
+            d.addCallback(lambda res, i=i: self._encode_segment(i, is_tail=False))
            d.addCallback(self._send_segment, i)
            d.addCallback(self._turn_barrier)
        last_segnum = self.num_segments - 1
-        d.addCallback(lambda res: self._encode_tail_segment(last_segnum))
+        d.addCallback(lambda res: self._encode_segment(last_segnum, is_tail=True))
        d.addCallback(self._send_segment, last_segnum)
        d.addCallback(self._turn_barrier)

@ -317,8 +317,24 @@ class Encoder(object):
            dl.append(d)
        return self._gather_responses(dl)

-    def _encode_segment(self, segnum):
-        codec = self._codec
+    def _encode_segment(self, segnum, is_tail):
+        """
+        Encode one segment of input into the configured number of shares.
+
+        :param segnum: Ostensibly, the number of the segment to encode.  In
+            reality, this parameter is ignored and the *next* segment is
+            encoded and returned.
+
+        :param bool is_tail: ``True`` if this is the last segment, ``False``
+            otherwise.
+
+        :return: A ``Deferred`` which fires with a two-tuple.  The first
+            element is a list of string-y objects representing the encoded
+            segment data for one of the shares.  The second element is a list
+            of integers giving the share numbers of the shares in the first
+            element.
+        """
+        codec = self._tail_codec if is_tail else self._codec
        start = time.time()

        # the ICodecEncoder API wants to receive a total of self.segment_size
@ -350,9 +366,11 @@ class Encoder(object):
        # footprint to 430KiB at the expense of more hash-tree overhead.

        d = self._gather_data(self.required_shares, input_piece_size,
-                              crypttext_segment_hasher)
+                              crypttext_segment_hasher, allow_short=is_tail)
        def _done_gathering(chunks):
            for c in chunks:
+                # If is_tail then a short trailing chunk will have been padded
+                # by _gather_data
                assert len(c) == input_piece_size
            self._crypttext_hashes.append(crypttext_segment_hasher.digest())
            # during this call, we hit 5*segsize memory
@ -365,31 +383,6 @@ class Encoder(object):
        d.addCallback(_done)
        return d

-    def _encode_tail_segment(self, segnum):
-
-        start = time.time()
-        codec = self._tail_codec
-        input_piece_size = codec.get_block_size()
-
-        crypttext_segment_hasher = hashutil.crypttext_segment_hasher()
-
-        d = self._gather_data(self.required_shares, input_piece_size,
-                              crypttext_segment_hasher, allow_short=True)
-        def _done_gathering(chunks):
-            for c in chunks:
-                # a short trailing chunk will have been padded by
-                # _gather_data
-                assert len(c) == input_piece_size
-            self._crypttext_hashes.append(crypttext_segment_hasher.digest())
-            return codec.encode(chunks)
-        d.addCallback(_done_gathering)
-        def _done(res):
-            elapsed = time.time() - start
-            self._times["cumulative_encoding"] += elapsed
-            return res
-        d.addCallback(_done)
-        return d
-
    def _gather_data(self, num_chunks, input_chunk_size,
                     crypttext_segment_hasher,
                     allow_short=False):