storage: more paranoid handling of bounds and palimpsests in mutable share files

* storage server ignores requests to extend shares by sending a new_length * storage server fills exposed holes (created by sending a write vector whose offset begins after the end of the current data) with 0 to avoid "palimpsest" exposure of previous contents * storage server zeroes out lease info at the old location when moving it to a new location ref. #1528
2025-04-09 03:44:23 +00:00 · 2011-09-12 15:26:55 -07:00 · 2011-09-12 15:26:55 -07:00 · 32f80625c9
commit 32f80625c9
parent 942c5e5162
3 changed files with 54 additions and 12 deletions
--- a/src/allmydata/interfaces.py
+++ b/src/allmydata/interfaces.py
@ -209,12 +209,31 @@ class RIStorageServer(RemoteInterface):
        necessary. A write vector applied to a share number that did not
        exist previously will cause that share to be created.

-        Each write vector is accompanied by a 'new_length' argument. If
-        new_length is not None, use it to set the size of the container. This
-        can be used to pre-allocate space for a series of upcoming writes, or
-        truncate existing data. If the container is growing, new_length will
-        be applied before datav. If the container is shrinking, it will be
-        applied afterwards. If new_length==0, the share will be deleted.
+        In Tahoe-LAFS v1.8.3 or later (except 1.9.0a1), if you send a write
+        vector whose offset is beyond the end of the current data, the space
+        between the end of the current data and the beginning of the write
+        vector will be filled with zero bytes. In earlier versions the
+        contents of this space was unspecified (and might end up containing
+        secrets).
+
+        Each write vector is accompanied by a 'new_length' argument, which
+        can be used to truncate the data. If new_length is not None and it is
+        less than the current size of the data (after applying all write
+        vectors), then the data will be truncated to new_length. If
+        new_length==0, the share will be deleted.
+
+        In Tahoe-LAFS v1.8.2 and earlier, new_length could also be used to
+        enlarge the file by sending a number larger than the size of the data
+        after applying all write vectors. That behavior was not used, and as
+        of Tahoe-LAFS v1.8.3 it no longer works and the new_length is ignored
+        in that case.
+
+        If a storage client can rely on a server being of version v1.8.3 or
+        later, it can extend the file efficiently by writing a single zero
+        byte just before the new end-of-file. Otherwise it must explicitly
+        write zeroes to all bytes between the old and new end-of-file. In any
+        case it should avoid sending new_length larger than the size of the
+        data after applying all write vectors.

        The read vector is used to extract data from all known shares,
        *before* any writes have been applied. The same vector is used for
--- a/src/allmydata/storage/mutable.py
+++ b/src/allmydata/storage/mutable.py
@ -146,11 +146,19 @@ class MutableShareFile:
            return
        num_extra_leases = self._read_num_extra_leases(f)
        f.seek(old_extra_lease_offset)
-        extra_lease_data = f.read(4 + num_extra_leases * self.LEASE_SIZE)
+        leases_size = 4 + num_extra_leases * self.LEASE_SIZE
+        extra_lease_data = f.read(leases_size)
+
+        # Zero out the old lease info (in order to minimize the chance that
+        # it could accidentally be exposed to a reader later, re #1528).
+        f.seek(old_extra_lease_offset)
+        f.write('\x00' * leases_size)
+        f.flush()
+
+        # An interrupt here will corrupt the leases.
+
        f.seek(new_extra_lease_offset)
        f.write(extra_lease_data)
-        # an interrupt here will corrupt the leases, iff the move caused the
-        # extra leases to overlap.
        self._write_extra_lease_offset(f, new_extra_lease_offset)

    def _write_share_data(self, f, offset, data):
@ -161,7 +169,11 @@ class MutableShareFile:

        if offset+length >= data_length:
            # They are expanding their data size.
+
            if self.DATA_OFFSET+offset+length > extra_lease_offset:
+                # TODO: allow containers to shrink. For now, they remain
+                # large.
+
                # Their new data won't fit in the current container, so we
                # have to move the leases. With luck, they're expanding it
                # more than the size of the extra lease block, which will
@ -175,6 +187,13 @@ class MutableShareFile:
            assert self.DATA_OFFSET+offset+length <= extra_lease_offset
            # Their data now fits in the current container. We must write
            # their new data and modify the recorded data size.
+
+            # Fill any newly exposed empty space with 0's.
+            if offset > data_length:
+                f.seek(self.DATA_OFFSET+data_length)
+                f.write('\x00'*(offset - data_length))
+                f.flush()
+
            new_data_length = offset+length
            self._write_data_length(f, new_data_length)
            # an interrupt here will result in a corrupted share
@ -398,9 +417,12 @@ class MutableShareFile:
        for (offset, data) in datav:
            self._write_share_data(f, offset, data)
        if new_length is not None:
-            self._change_container_size(f, new_length)
-            f.seek(self.DATA_LENGTH_OFFSET)
-            f.write(struct.pack(">Q", new_length))
+            cur_length = self._read_data_length(f)
+            if new_length < cur_length:
+                self._write_data_length(f, new_length)
+                # TODO: if we're going to shrink the share file when the
+                # share data has shrunk, then call
+                # self._change_container_size() here.
        f.close()

 def testv_compare(a, op, b):
--- a/src/allmydata/storage/server.py
+++ b/src/allmydata/storage/server.py
@ -222,6 +222,7 @@ class StorageServer(service.MultiService, Referenceable):
                    { "maximum-immutable-share-size": remaining_space,
                      "tolerates-immutable-read-overrun": True,
                      "delete-mutable-shares-with-zero-length-writev": True,
+                      "fills-holes-with-zero-bytes": True,
                      "prevents-read-past-end-of-share-data": True,
                      },
                    "application-version": str(allmydata.__full_version__),