implement URI:LIT, which stores small (<55B) files inside the URI itself. Fixes #81.

This commit is contained in:
Brian Warner 2007-07-12 13:22:36 -07:00
parent b62252183d
commit def63d193e
5 changed files with 221 additions and 26 deletions

View File

@ -23,6 +23,11 @@ def flush_but_dont_ignore(res):
d.addCallback(_done)
return d
LARGE_DATA = """
This is some data to publish to the virtual drive, which needs to be large
enough to not fit inside a LIT uri.
"""
class SystemTest(testutil.SignalMixin, unittest.TestCase):
def setUp(self):
@ -234,7 +239,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
def test_vdrive(self):
self.basedir = "system/SystemTest/test_vdrive"
self.data = DATA = "Some data to publish to the virtual drive\n"
self.data = LARGE_DATA
d = self.set_up_nodes()
d.addCallback(self.log, "starting publish")
d.addCallback(self._do_publish)
@ -568,7 +573,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
self.failUnless("size: %d\n" % len(self.data) in output)
self.failUnless("num_segments: 1\n" in output)
# segment_size is always a multiple of needed_shares
self.failUnless("segment_size: 50\n" in output)
self.failUnless("segment_size: 125\n" in output)
self.failUnless("total_shares: 100\n" in output)
# keys which are supposed to be present
for key in ("size", "num_segments", "segment_size",

View File

@ -4,7 +4,7 @@ from twisted.python.failure import Failure
from cStringIO import StringIO
from allmydata import upload, encode
from allmydata.uri import unpack_uri
from allmydata.uri import unpack_uri, unpack_lit
from test_encode import FakePeer
@ -15,6 +15,18 @@ class FakeClient:
return [ ("%20d"%fakeid, "%20d"%fakeid, FakePeer(self.mode),)
for fakeid in range(50) ]
DATA = """
Once upon a time, there was a beautiful princess named Buttercup. She lived
in a magical land where every file was stored securely among millions of
machines, and nobody ever worried about their data being lost ever again.
The End.
"""
assert len(DATA) > upload.Uploader.URI_LIT_SIZE_THRESHOLD
SIZE_ZERO = 0
SIZE_SMALL = 16
SIZE_LARGE = len(DATA)
class GoodServer(unittest.TestCase):
def setUp(self):
self.node = FakeClient(mode="good")
@ -22,7 +34,13 @@ class GoodServer(unittest.TestCase):
self.u.running = True
self.u.parent = self.node
def _check(self, uri):
def _check_small(self, uri, size):
self.failUnless(isinstance(uri, str))
self.failUnless(uri.startswith("URI:LIT:"))
d = unpack_lit(uri)
self.failUnlessEqual(len(d), size)
def _check_large(self, uri, size):
self.failUnless(isinstance(uri, str))
self.failUnless(uri.startswith("URI:"))
d = unpack_uri(uri)
@ -30,31 +48,76 @@ class GoodServer(unittest.TestCase):
self.failUnlessEqual(len(d['storage_index']), 32)
self.failUnless(isinstance(d['key'], str))
self.failUnlessEqual(len(d['key']), 16)
self.failUnlessEqual(d['size'], size)
def testData(self):
data = "This is some data to upload"
def get_data(self, size):
return DATA[:size]
def test_data_zero(self):
data = self.get_data(SIZE_ZERO)
d = self.u.upload_data(data)
d.addCallback(self._check)
d.addCallback(self._check_small, SIZE_ZERO)
return d
testData.timeout = 300
def testFileHandle(self):
data = "This is some data to upload"
def test_data_small(self):
data = self.get_data(SIZE_SMALL)
d = self.u.upload_data(data)
d.addCallback(self._check_small, SIZE_SMALL)
return d
def test_data_large(self):
data = self.get_data(SIZE_LARGE)
d = self.u.upload_data(data)
d.addCallback(self._check_large, SIZE_LARGE)
return d
def test_filehandle_zero(self):
data = self.get_data(SIZE_ZERO)
d = self.u.upload_filehandle(StringIO(data))
d.addCallback(self._check)
d.addCallback(self._check_small, SIZE_ZERO)
return d
testFileHandle.timeout = 300
def testFilename(self):
fn = "Uploader-testFilename.data"
def test_filehandle_small(self):
data = self.get_data(SIZE_SMALL)
d = self.u.upload_filehandle(StringIO(data))
d.addCallback(self._check_small, SIZE_SMALL)
return d
def test_filehandle_large(self):
data = self.get_data(SIZE_LARGE)
d = self.u.upload_filehandle(StringIO(data))
d.addCallback(self._check_large, SIZE_LARGE)
return d
def test_filename_zero(self):
fn = "Uploader-test_filename_zero.data"
f = open(fn, "wb")
data = "This is some data to upload"
data = self.get_data(SIZE_ZERO)
f.write(data)
f.close()
d = self.u.upload_filename(fn)
d.addCallback(self._check)
d.addCallback(self._check_small, SIZE_ZERO)
return d
def test_filename_small(self):
fn = "Uploader-test_filename_small.data"
f = open(fn, "wb")
data = self.get_data(SIZE_SMALL)
f.write(data)
f.close()
d = self.u.upload_filename(fn)
d.addCallback(self._check_small, SIZE_SMALL)
return d
def test_filename_large(self):
fn = "Uploader-test_filename_large.data"
f = open(fn, "wb")
data = self.get_data(SIZE_LARGE)
f.write(data)
f.close()
d = self.u.upload_filename(fn)
d.addCallback(self._check_large, SIZE_LARGE)
return d
testFilename.test = 300
class FullServer(unittest.TestCase):
def setUp(self):
@ -66,8 +129,8 @@ class FullServer(unittest.TestCase):
def _should_fail(self, f):
self.failUnless(isinstance(f, Failure) and f.check(encode.NotEnoughPeersError))
def testData(self):
data = "This is some data to upload"
def test_data_large(self):
data = DATA
d = self.u.upload_data(data)
d.addBoth(self._should_fail)
return d

View File

@ -0,0 +1,84 @@
from twisted.trial import unittest
from allmydata import uri
from allmydata.util import hashutil
class LIT(unittest.TestCase):
def test_pack(self):
data = "This is some small data"
u = uri.pack_lit(data)
self.failUnlessEqual(uri.get_uri_type(u), "LIT")
self.failUnlessEqual(uri.unpack_lit(u), data)
def test_nonascii(self):
data = "This contains \x00 and URI:LIT: and \n, oh my."
u = uri.pack_lit(data)
self.failUnlessEqual(uri.get_uri_type(u), "LIT")
self.failUnlessEqual(uri.unpack_lit(u), data)
class CHK(unittest.TestCase):
def test_pack(self):
storage_index = hashutil.tagged_hash("foo", "bar")
key = "\x00" * 16
uri_extension_hash = hashutil.uri_extension_hash("stuff")
needed_shares = 25
total_shares = 100
size = 1234
u = uri.pack_uri(storage_index=storage_index,
key=key,
uri_extension_hash=uri_extension_hash,
needed_shares=needed_shares,
total_shares=total_shares,
size=size)
self.failUnlessEqual(uri.get_uri_type(u), "CHK")
d = uri.unpack_uri(u)
self.failUnlessEqual(d['storage_index'], storage_index)
self.failUnlessEqual(d['key'], key)
self.failUnlessEqual(d['uri_extension_hash'], uri_extension_hash)
self.failUnlessEqual(d['needed_shares'], needed_shares)
self.failUnlessEqual(d['total_shares'], total_shares)
self.failUnlessEqual(d['size'], size)
class Extension(unittest.TestCase):
def test_pack(self):
data = {"stuff": "value",
"size": 12,
"needed_shares": 3,
"big_hash": hashutil.tagged_hash("foo", "bar"),
}
ext = uri.pack_extension(data)
d = uri.unpack_extension(ext)
self.failUnlessEqual(d["stuff"], "value")
self.failUnlessEqual(d["size"], 12)
self.failUnlessEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar"))
readable = uri.unpack_extension_readable(ext)
class Dirnode(unittest.TestCase):
def test_pack(self):
furl = "pb://stuff@morestuff:stuff/andstuff"
writekey = "\x01" * 16
u = uri.pack_dirnode_uri(furl, writekey)
self.failUnless(uri.is_dirnode_uri(u))
self.failIf(uri.is_dirnode_uri("NOT A DIRNODE URI"))
self.failIf(uri.is_dirnode_uri("URI:stuff"))
self.failUnless(uri.is_mutable_dirnode_uri(u))
self.failIf(uri.is_mutable_dirnode_uri("NOT A DIRNODE URI"))
self.failIf(uri.is_mutable_dirnode_uri("URI:stuff"))
self.failUnlessEqual(uri.get_uri_type(u), "DIR")
rou = uri.make_immutable_dirnode_uri(u)
self.failUnless(uri.is_dirnode_uri(rou))
self.failIf(uri.is_mutable_dirnode_uri(rou))
self.failUnlessEqual(uri.get_uri_type(rou), "DIR-RO")
d = uri.unpack_dirnode_uri(u)
self.failUnlessEqual(d[0], furl)
self.failUnlessEqual(d[1], writekey)
d2 = uri.unpack_dirnode_uri(rou)
self.failUnlessEqual(d2[0], furl)
rk = hashutil.dir_read_key_hash(writekey)
self.failUnlessEqual(d2[1], rk)

View File

@ -6,13 +6,14 @@ from foolscap import Referenceable
from allmydata.util import idlib, hashutil
from allmydata import encode, storageserver
from allmydata.uri import pack_uri
from allmydata.uri import pack_uri, pack_lit
from allmydata.interfaces import IUploadable, IUploader
from allmydata.Crypto.Cipher import AES
from cStringIO import StringIO
import collections, random
class HaveAllPeersError(Exception):
# we use this to jump out of the loop
pass
@ -261,6 +262,20 @@ class FileUploader:
size=self._size,
)
class LiteralUploader:
def __init__(self, client, options={}):
self._client = client
self._options = options
def set_filehandle(self, filehandle):
self._filehandle = filehandle
def start(self):
self._filehandle.seek(0)
data = self._filehandle.read()
return defer.succeed(pack_lit(data))
class FileName:
implements(IUploadable)
@ -296,6 +311,7 @@ class Uploader(service.MultiService):
implements(IUploader)
name = "uploader"
uploader_class = FileUploader
URI_LIT_SIZE_THRESHOLD = 55
needed_shares = 25 # Number of shares required to reconstruct a file.
desired_shares = 75 # We will abort an upload unless we can allocate space for at least this many.
@ -341,12 +357,20 @@ class Uploader(service.MultiService):
assert self.running
f = IUploadable(f)
fh = f.get_filehandle()
u = self.uploader_class(self.parent, options)
u.set_filehandle(fh)
u.set_params(self.needed_shares, self.desired_shares, self.total_shares)
plaintext_hash, key, crypttext_hash = self.compute_id_strings(fh)
u.set_encryption_key(key)
u.set_id_strings(crypttext_hash, plaintext_hash)
fh.seek(0,2)
size = fh.tell()
fh.seek(0)
if size <= self.URI_LIT_SIZE_THRESHOLD:
u = LiteralUploader(self.parent, options)
u.set_filehandle(fh)
else:
u = self.uploader_class(self.parent, options)
u.set_filehandle(fh)
u.set_params(self.needed_shares, self.desired_shares,
self.total_shares)
plaintext_hash, key, crypttext_hash = self.compute_id_strings(fh)
u.set_encryption_key(key)
u.set_id_strings(crypttext_hash, plaintext_hash)
d = u.start()
def _done(res):
f.close_filehandle(fh)

View File

@ -2,6 +2,16 @@
import re
from allmydata.util import idlib, hashutil
def get_uri_type(uri):
assert uri.startswith("URI:")
if uri.startswith("URI:DIR:"):
return "DIR"
if uri.startswith("URI:DIR-RO:"):
return "DIR-RO"
if uri.startswith("URI:LIT:"):
return "LIT"
return "CHK"
# the URI shall be an ascii representation of the file. It shall contain
# enough information to retrieve and validate the contents. It shall be
# expressed in a limited character set (namely [TODO]).
@ -87,6 +97,15 @@ def unpack_extension_readable(data):
unpacked[k] = idlib.b2a(unpacked[k])
return unpacked
def pack_lit(data):
return "URI:LIT:%s" % idlib.b2a(data)
def unpack_lit(uri):
assert uri.startswith("URI:LIT:")
data_s = uri[len("URI:LIT:"):]
return idlib.a2b(data_s)
def is_dirnode_uri(uri):
return uri.startswith("URI:DIR:") or uri.startswith("URI:DIR-RO:")
def is_mutable_dirnode_uri(uri):