2006-12-14 03:32:35 +00:00
|
|
|
|
|
|
|
"""
|
|
|
|
Read and write chunks from files.
|
|
|
|
|
|
|
|
Version 1.0.0.
|
|
|
|
|
|
|
|
A file is divided into blocks, each of which has size L{BLOCK_SIZE}
|
|
|
|
(except for the last block, which may be smaller). Blocks are encoded
|
|
|
|
into chunks. One publishes the hash of the entire file. Clients
|
|
|
|
who want to download the file first obtain the hash, then the clients
|
|
|
|
can receive chunks in any order. Cryptographic hashing is used to
|
|
|
|
verify each received chunk before writing to disk. Thus it is
|
|
|
|
impossible to download corrupt data if one has the correct file hash.
|
|
|
|
|
|
|
|
One obtains the hash of a complete file via
|
|
|
|
L{CompleteChunkFile.file_hash}. One can read chunks from a complete
|
|
|
|
file by the sequence operations of C{len()} and subscripting on a
|
|
|
|
L{CompleteChunkFile} object. One can open an empty or partially
|
|
|
|
downloaded file with L{PartialChunkFile}, and read and write chunks
|
|
|
|
to this file. A chunk will fail to write if its contents and index
|
|
|
|
are not consistent with the overall file hash passed to
|
|
|
|
L{PartialChunkFile} when the partial chunk file was first created.
|
|
|
|
|
|
|
|
The chunks have an overhead of less than 4% for files of size
|
|
|
|
less than C{10**20} bytes.
|
|
|
|
|
|
|
|
Benchmarks:
|
|
|
|
|
|
|
|
- On a 3 GHz Pentium 3, it took 3.4 minutes to first make a
|
|
|
|
L{CompleteChunkFile} object for a 4 GB file. Up to 10 MB of
|
|
|
|
memory was used as the constructor ran. A metafile filename
|
|
|
|
was passed to the constructor, and so the hash information was
|
|
|
|
written to the metafile. The object used a negligible amount
|
|
|
|
of memory after the constructor was finished.
|
|
|
|
- Creation of L{CompleteChunkFile} objects in future runs of the
|
|
|
|
program took negligible time, since the hash information was
|
|
|
|
already stored in the metafile.
|
|
|
|
|
|
|
|
@var BLOCK_SIZE: Size of a block. See L{BlockFile}.
|
|
|
|
@var MAX_CHUNK_SIZE: Upper bound on the size of a chunk.
|
|
|
|
See L{CompleteChunkFile}.
|
|
|
|
|
|
|
|
free (adj.): unencumbered; not under the control of others
|
|
|
|
Written by Connelly Barnes in 2005 and released into the
|
|
|
|
public domain with no warranty of any kind, either expressed
|
|
|
|
or implied. It probably won't make your computer catch on fire,
|
|
|
|
or eat your children, but it might. Use at your own risk.
|
|
|
|
"""
|
|
|
|
|
2007-03-30 01:12:35 +00:00
|
|
|
from allmydata.util.hashutil import tagged_hash, tagged_pair_hash
|
2006-12-14 03:32:35 +00:00
|
|
|
|
2007-03-30 01:12:35 +00:00
|
|
|
__version__ = '1.0.0-allmydata'
|
2006-12-14 03:32:35 +00:00
|
|
|
|
|
|
|
BLOCK_SIZE = 65536
|
|
|
|
MAX_CHUNK_SIZE = BLOCK_SIZE + 4096
|
|
|
|
|
|
|
|
def roundup_pow2(x):
|
|
|
|
"""
|
|
|
|
Round integer C{x} up to the nearest power of 2.
|
|
|
|
"""
|
|
|
|
ans = 1
|
|
|
|
while ans < x:
|
|
|
|
ans *= 2
|
|
|
|
return ans
|
|
|
|
|
|
|
|
|
|
|
|
class CompleteBinaryTreeMixin:
|
|
|
|
"""
|
|
|
|
Adds convenience methods to a complete binary tree.
|
|
|
|
|
|
|
|
Assumes the total number of elements in the binary tree may be
|
|
|
|
accessed via C{__len__}, and that each element can be retrieved
|
|
|
|
using list subscripting.
|
|
|
|
|
|
|
|
Tree is indexed like so::
|
|
|
|
|
|
|
|
|
|
|
|
0
|
|
|
|
/ \
|
|
|
|
1 2
|
|
|
|
/ \ / \
|
|
|
|
3 4 5 6
|
|
|
|
/ \ / \ / \ / \
|
|
|
|
7 8 9 10 11 12 13 14
|
|
|
|
|
|
|
|
"""
|
|
|
|
def parent(self, i):
|
|
|
|
"""
|
|
|
|
Index of the parent of C{i}.
|
|
|
|
"""
|
|
|
|
if i < 1 or (hasattr(self, '__len__') and i >= len(self)):
|
|
|
|
raise IndexError('index out of range: ' + repr(i))
|
|
|
|
return (i - 1) // 2
|
|
|
|
|
|
|
|
def lchild(self, i):
|
|
|
|
"""
|
|
|
|
Index of the left child of C{i}.
|
|
|
|
"""
|
|
|
|
ans = 2 * i + 1
|
|
|
|
if i < 0 or (hasattr(self, '__len__') and ans >= len(self)):
|
|
|
|
raise IndexError('index out of range: ' + repr(i))
|
|
|
|
return ans
|
|
|
|
|
|
|
|
def rchild(self, i):
|
|
|
|
"""
|
|
|
|
Index of right child of C{i}.
|
|
|
|
"""
|
|
|
|
ans = 2 * i + 2
|
|
|
|
if i < 0 or (hasattr(self, '__len__') and ans >= len(self)):
|
|
|
|
raise IndexError('index out of range: ' + repr(i))
|
|
|
|
return ans
|
|
|
|
|
|
|
|
def sibling(self, i):
|
|
|
|
"""
|
|
|
|
Index of sibling of C{i}.
|
|
|
|
"""
|
|
|
|
parent = self.parent(i)
|
|
|
|
if self.lchild(parent) == i:
|
|
|
|
return self.rchild(parent)
|
|
|
|
else:
|
|
|
|
return self.lchild(parent)
|
|
|
|
|
2006-12-14 11:17:01 +00:00
|
|
|
def needed_for(self, i):
|
2006-12-14 03:32:35 +00:00
|
|
|
"""
|
2006-12-14 11:31:17 +00:00
|
|
|
Return a list of node indices that are necessary for the hash chain.
|
2006-12-14 03:32:35 +00:00
|
|
|
"""
|
|
|
|
if i < 0 or i >= len(self):
|
|
|
|
raise IndexError('index out of range: ' + repr(i))
|
|
|
|
needed = []
|
|
|
|
here = i
|
|
|
|
while here != 0:
|
|
|
|
needed.append(self.sibling(here))
|
|
|
|
here = self.parent(here)
|
|
|
|
return needed
|
|
|
|
|
|
|
|
|
|
|
|
class HashTree(CompleteBinaryTreeMixin, list):
|
|
|
|
"""
|
|
|
|
Compute Merkle hashes at any node in a complete binary tree.
|
|
|
|
|
|
|
|
Tree is indexed like so::
|
|
|
|
|
|
|
|
|
|
|
|
0
|
|
|
|
/ \
|
|
|
|
1 2
|
|
|
|
/ \ / \
|
|
|
|
3 4 5 6
|
|
|
|
/ \ / \ / \ / \
|
|
|
|
7 8 9 10 11 12 13 14 <- List passed to constructor.
|
|
|
|
|
|
|
|
"""
|
|
|
|
def __init__(self, L):
|
|
|
|
"""
|
|
|
|
Create complete binary tree from list of hash strings.
|
|
|
|
|
|
|
|
The list is augmented by hashes so its length is a power of 2, and
|
|
|
|
then this is used as the bottom row of the hash tree.
|
|
|
|
|
|
|
|
The augmenting is done so that if the augmented element is at
|
2007-03-30 01:12:35 +00:00
|
|
|
index C{i}, then its value is C{hash(tagged_hash('Merkle tree empty leaf', '%d'%i))}.
|
2006-12-14 03:32:35 +00:00
|
|
|
"""
|
|
|
|
# Augment the list.
|
|
|
|
start = len(L)
|
|
|
|
end = roundup_pow2(len(L))
|
|
|
|
L = L + [None] * (end - start)
|
|
|
|
for i in range(start, end):
|
2007-03-30 01:12:35 +00:00
|
|
|
L[i] = tagged_hash('Merkle tree empty leaf', "%d"%i)
|
2006-12-14 03:32:35 +00:00
|
|
|
# Form each row of the tree.
|
|
|
|
rows = [L]
|
|
|
|
while len(rows[-1]) != 1:
|
|
|
|
last = rows[-1]
|
2007-03-30 01:12:35 +00:00
|
|
|
rows += [[tagged_pair_hash('Merkle tree internal node', last[2*i], last[2*i+1]) for i in xrange(len(last)//2)]]
|
2006-12-14 03:32:35 +00:00
|
|
|
# Flatten the list of rows into a single list.
|
|
|
|
rows.reverse()
|
|
|
|
self[:] = sum(rows, [])
|
|
|
|
|