immutable WriteBucketProxy: use pipeline to speed up uploads by overlapping roundtrips, for #392

2025-01-19 19:26:25 +00:00 · 2009-05-18 16:44:22 -07:00 · 2009-05-18 16:44:22 -07:00 · 79437baade
commit 79437baade
parent e76c6b606f
1 changed files with 20 additions and 5 deletions
--- a/src/allmydata/immutable/layout.py
+++ b/src/allmydata/immutable/layout.py
@ -3,7 +3,7 @@ from zope.interface import implements
 from twisted.internet import defer
 from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \
     FileTooLargeError, HASH_SIZE
-from allmydata.util import mathutil, idlib, observer
+from allmydata.util import mathutil, idlib, observer, pipeline
 from allmydata.util.assertutil import precondition
 from allmydata.storage.server import si_b2a
@ -93,7 +93,8 @@ class WriteBucketProxy:
    fieldstruct = ">L"
    def __init__(self, rref, data_size, block_size, num_segments,
-                 num_share_hashes, uri_extension_size_max, nodeid):
+                 num_share_hashes, uri_extension_size_max, nodeid,
                 pipeline_size=50000):
        self._rref = rref
        self._data_size = data_size
        self._block_size = block_size
@ -110,6 +111,12 @@ class WriteBucketProxy:
        self._create_offsets(block_size, data_size)
        # k=3, max_segment_size=128KiB gives us a typical segment of 43691
        # bytes. Setting the default pipeline_size to 50KB lets us get two
        # segments onto the wire but not a third, which would keep the pipe
        # filled.
        self._pipeline = pipeline.Pipeline(pipeline_size)
    def get_allocated_size(self):
        return (self._offsets['uri_extension'] + self.fieldsize +
                self._uri_extension_size_max)
@ -218,11 +225,19 @@ class WriteBucketProxy:
        return self._write(offset, length+data)
    def _write(self, offset, data):
-        # TODO: for small shares, buffer the writes and do just a single call
+        # use a Pipeline to pipeline several writes together. TODO: another
-        return self._rref.callRemote("write", offset, data)
+        # speedup would be to coalesce small writes into a single call: this
        # would reduce the foolscap CPU overhead per share, but wouldn't
        # reduce the number of round trips, so it might not be worth the
        # effort.
        return self._pipeline.add(len(data),
                                  self._rref.callRemote, "write", offset, data)
    def close(self):
-        return self._rref.callRemote("close")
+        d = self._pipeline.add(0, self._rref.callRemote, "close")
        d.addCallback(lambda ign: self._pipeline.flush())
        return d
    def abort(self):
        return self._rref.callRemoteOnly("abort")